In [1]:
from __future__ import print_function
import numpy as np
import pandas as pd

import psycopg2

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context('notebook', font_scale=1.2)

import matplotlib.style
matplotlib.style.use('ggplot')
%matplotlib inline

from IPython.display import display

In [2]:
import pymc3 as pm
import theano
import theano.tensor as T
from scipy.stats.stats import pearsonr

import sklearn.model_selection

In [3]:
# create a database connection
sqluser = 'mimic'
dbname = 'mimic'
schema_name = 'mimiciii'

cur = None

In [4]:
if cur: 
    cur.close()
    con.close()

con = psycopg2.connect(dbname = dbname, user = sqluser)
cur = con.cursor()
cur.execute('SET search_path to ' + schema_name)

To recap, variables of interest include: 

+ Echo parameters (LV systolic, RV size, pulmonary hypertension)
+ Fluids (day 1: missing term = 1 only if missing on day 1, day1-2: missing term = 1 if missing on day 1 or 2, day1-3: missing term = 1 if missing on day 1, day 2 or day 3)
+ Interactions: fluids x echo variables, fluids x CHF, echo x CHF
+ Covariates: Age, gender, BMI, BMI missing, mech vent, CHF, MDRD, Cr missing, apache score, lactate, lactate missing, ICU type, pressors indicator variable

In [5]:
query = """
SELECT * FROM icu_features
"""
ef_ = pd.read_sql_query(query, con)
ef_.head()

Unnamed: 0,icustay_id,hadm_id,subject_id,age,gender,height,weight,ethnicity,insurance,filter_vaso,...,pc_bronch,pc_cath,pc_echo,pc_pressor,pc_rhc,pc_thora,pc_vent,passed_filters,use_record,bmi
0,200001,152234,55973,22290 days 19:06:12,F,167.851667,27.669135,ASIAN - ASIAN INDIAN,Medicare,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,9.820741
1,200003,163557,27513,17625 days 19:50:04,M,177.8,78.224998,WHITE,Private,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,24.744692
2,200006,189514,10950,19736 days 11:28:14,M,165.1,82.400002,OTHER,Medicaid,False,...,,,,,,,,,,30.229647
3,200007,129310,20707,15818 days 10:03:37,M,177.8,126.0,WHITE,Private,False,...,,,,,,,,,,39.857223
4,200009,129607,29904,17353 days 10:34:32,F,160.02,85.833331,WHITE,Private,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,33.520264


In [6]:
len(ef_)

61532

In [7]:
use_record = (ef_['use_record'] == 1)
has_echo = ~ef_['ea_key'].isnull()
F = use_record & has_echo

In [8]:
ef_ = ef_.loc[F]

len(ef_)

3686

In [9]:
import statsmodels.api as sm

# Preprocessing

## Add MDRD to table

In [10]:
def mdrd(data):
    creat = data.lab_creatinine
    age = data.age/np.timedelta64('1', 'Y')
    age.loc[age<0] = age.max()
    gender = data.gender.apply(lambda x: 0.742 if x == 'F' else 1)
    ethnicity = data.ethnicity.apply(lambda x: 1.212 if 'AFRICAN AMERICAN' in x else 1)
    return 175*creat**(-1.154) * age**(-0.203) * gender * ethnicity

In [11]:
value = mdrd(ef_)
ef_['mdrd'] = value
discrete = pd.cut(value, [0, 15, 30, 44, 60, 90, 1000000], labels=[5, 4, 3, 2, 1, 0], include_lowest=True)
ef_['mdrd_discrete'] = discrete
discrete.value_counts()

0    905
1    783
2    597
4    579
3    518
5    246
dtype: int64

## fluid_day1

In [36]:
def normalize(x):
    x = x.copy()
    I = ~np.isnan(x)
    x[I] = (x[I] - x[I].mean())/x[I].std()
    return x

choose = (ef_.fb_day1_balance_ml > -8000) & (ef_.fb_day1_balance_ml < 8000)
print(sum(choose))
ef = ef_.loc[choose]

X = pd.DataFrame()

# build the features table
age = ef.age/np.timedelta64('1', 'Y') # time in years
age_masked = age < 0 # if age is less than 0, then the person is above 89 (90?)
age = normalize(age)
age[age_masked] = 0
X['age'] = age
X['age_over_90'] = age_masked.astype(float)

gender = ef.gender
gender = (gender == 'F').astype(float)
X['gender'] = gender

bmi = ef.bmi
bmi_missing = np.isnan(bmi)
bmi = normalize(bmi)
bmi[bmi_missing] = 0
X['bmi'] = bmi
X['bmi_missing'] = bmi_missing.astype(float)

mech_vent = ef.vf_first_day_vent.astype(float)
X['mech_vent'] = mech_vent

chf = ef.ex_congestive_heart_failure.astype(float)
X['CHF'] = chf

mdrd = ef.mdrd
mdrd_missing = np.isnan(mdrd)
mdrd = normalize(mdrd)
mdrd[mdrd_missing] = 0
X['mdrd'] = mdrd

# MDRD missing is already largely covered by BMI missing
# the only other missing values come from missing creatinine lab
# so add that
creatinine_missing = np.isnan(ef.lab_creatinine)
X['creatinine_missing'] = creatinine_missing.astype(float)

apsiii = ef.apsiii
#apsiii_missing = np.isnan(apsiii)
apsiii = normalize(apsiii)
#apsiii[apsiii_missing] = 0
X['apsiii'] = apsiii
#X['apsiii_missing'] = apsiii_missing.astype(float)

no_echo = np.isnan(ef.ea_key)
#X['no_echo'] = no_echo.astype(float)

# -3 cannot assess
# -2 ?depressed
# -1 hyper
# 0 normal
# 1 mildly depressed
# 2 moderately depressed
# 3 severe
lvsys = ef.ea_lv_systolic
lv_missing = (((np.isnan(lvsys)) | (lvsys < -2)) & ~no_echo).astype(float)
lv_hyperdynamic = (lvsys == -1).astype(float)
lv_normal = (lvsys == 0).astype(float)
lv_mild = (lvsys == 1).astype(float)
lv_depressed = ((lvsys == 2) | (lvsys == -2)).astype(float)
lv_sev_depressed = (lvsys == 3).astype(float)
X['lv_missing'] = lv_missing
X['lv_normal'] = lv_normal
X['lv_mild'] = lv_mild
X['lv_hyper'] = lv_hyperdynamic
X['lv_depressed'] = lv_depressed
X['lv_severe'] = lv_sev_depressed

# -3 cannot assess
# -2 PA systolic hypertension
# 0 normal
# 1 mild
# 2 moderate
# 3 severe
tvpulm = ef.ea_tv_pulm_htn
tv_missing = (((np.isnan(tvpulm)) | (tvpulm < -2)) & ~no_echo).astype(float)
tv_normal = (tvpulm == 0).astype(float)
tv_abnormal = ((tvpulm == -2) | (tvpulm > 0)).astype(float)
X['plm_htn_missing'] = tv_missing
X['plm_htn_normal'] = tv_normal
X['plm_htn_abnormal'] = tv_abnormal

# -3 cannot assess 
# -2 dilated
# -1 small
# 0 normal
# 1 mildly dilated
# 2 moderately dilated
rvcavity = ef.ea_rv_cavity
rvcav_missing = (((np.isnan(rvcavity) | (rvcavity < -2))) & ~no_echo).astype(float)
rvcav_normal = (rvcavity == 0).astype(float)
rvcav_small = (rvcavity == -1).astype(float)
rvcav_dilated = ((rvcavity == -2) | (rvcavity > 0)).astype(float)
X['rvcav_missing'] = rvcav_missing
X['rvcav_normal'] = rvcav_normal
X['rvcav_small'] = rvcav_small
X['rvcav_dilated'] = rvcav_dilated

lactate = ef.lab_lactate
lactate_missing = np.isnan(lactate)
lactate = normalize(lactate)
lactate[lactate_missing] = 0
X['lactate'] = lactate
X['lactate_missing'] = lactate_missing.astype(float)

## which ICU was treating the patient
sicu = ef.st_sicu.astype(float)
sicu[np.isnan(sicu)] = 0
nsicu = ef.st_nsicu.astype(float)
nsicu[np.isnan(nsicu)] = 0
micu = ef.st_micu.astype(float)
micu[np.isnan(micu)] = 0
#X['sicu'] = sicu
#X['nsicu'] = nsicu
X['micu'] = micu

## on vasopressors
on_vaso = ef.filter_vaso.astype(float)
X['on_vaso'] = on_vaso

## fluids
fluid_day1 = ef.fb_day1_balance_ml.values.copy()
f1_missing = np.isnan(fluid_day1)
fluid_day1[~f1_missing] = normalize(fluid_day1[~f1_missing])
fluid_day1[f1_missing] = 0
X['fluid_day1'] = fluid_day1
X['fluid_day1_missing'] = f1_missing.astype(float)

fluid_day2 = ef.fb_day2_balance_ml.values.copy()
f2_missing = np.isnan(fluid_day1) | np.isnan(fluid_day2)
fluid_day2[~f2_missing] = normalize(fluid_day2[~f2_missing])
fluid_day2[f2_missing] = 0
X['fluid_day2'] = fluid_day2
X['fluid_day2_missing'] = f2_missing.astype(float)

fluid_day3 = ef.fb_day3_balance_ml.values.copy()
f3_missing = np.isnan(fluid_day1) | np.isnan(fluid_day2) | np.isnan(fluid_day3)
fluid_day3[~f3_missing] = normalize(fluid_day3[~f3_missing])
fluid_day3[f3_missing] = 0
X['fluid_day3'] = fluid_day3
X['fluid_day3_missing'] = f3_missing.astype(float)

y = (ef.dod - ef.intime) < np.timedelta64(30, 'D')

echo_features = {
    'lv': ['lv_missing', 'lv_mild', 'lv_hyper', 'lv_depressed', 'lv_severe'], 
    'plm_htn': ['plm_htn_missing', 'plm_htn_abnormal'], 
    'rvcav': ['rvcav_missing', 'rvcav_small', 'rvcav_dilated'], 
}
echo_features['all'] = echo_features['lv'] + echo_features['plm_htn'] + echo_features['rvcav']


fluid_features = {
    'fluid_day1': ['fluid_day1'], 
    'fluid_day2': ['fluid_day2'], 
    'fluid_day3': ['fluid_day3'],
}
fluid_missing = {
    'fluid_day1': ['fluid_day1_missing'], 
    'fluid_day2': ['fluid_day2_missing'], 
    'fluid_day3': ['fluid_day3_missing'],
}

# Age, gender, BMI, BMI missing, mech vent, CHF, MDRD, Cr missing, apache score, lactate, 
#lactate missing, ICU type, pressors indicator variable
covariates = ['age', 'age_over_90', # age
    "gender", # gender
    "bmi", "bmi_missing", # bmi
    "mech_vent", # mech_vent
    "CHF", # CHF
    "mdrd", "creatinine_missing", # MDRD
    "apsiii", # apache score
    "lactate", "lactate_missing", # lactate
    "micu", #"sicu", "nsicu", "micu", # icu type
    "on_vaso", # pressors indicator variable
]

def make_data(fluid_fs, echo_fs): 
    
    Xf = X[fluid_features[fluid_fs]]
    Xe = X[echo_features[echo_fs]]
    Xc = X[covariates]

    return pd.concat([Xf, Xe, Xc], axis = 1)

def make_interactions(fluid_fs, echo_fs): 
    
    Xf = X[fluid_features[fluid_fs]]
    Xe = X[echo_features[echo_fs]]
    Xc = X[covariates]

    # Interactions: fluids x echo variables, fluids x CHF, echo x CHF
    # Fluids x echo variables
    Xfe = {}
    for cf in Xf.columns: 
        for ce in Xe.columns: 
            Xfe["{}*{}".format(cf, ce)] = Xf[cf]*Xe[ce]
    Xfe = pd.DataFrame(Xfe)

    # Fluids x CHF
    Xfc = {}
    for cf in Xf.columns:
        Xfc["{}*CHF".format(cf)] = Xf[cf] * Xc['CHF']
    Xfc = pd.DataFrame(Xfc)

    # Echo x CHF
    Xec = {}
    for ce in Xe.columns: 
        Xec["{}*CHF".format(ce)] = Xe[ce] * Xc['CHF']
    Xec = pd.DataFrame(Xec)

    return pd.concat([Xfe, Xfc, Xec], axis = 1)

X.shape, y.shape

3307


((3307, 33), (3307,))

## Covariates only

In [38]:
fluid_fs = 'fluid_day1'
echo_fs = 'lv'

Xc = X[covariates]
X_ = Xc
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.526396
         Iterations 8
                           fluid_day1:lv
Model:                Logit            Pseudo R-squared: 0.122     
Dependent Variable:   y                AIC:              3511.5844 
Date:                 2017-05-25 18:52 BIC:              3603.1413 
No. Observations:     3307             Log-Likelihood:   -1740.8   
Df Model:             14               LL-Null:          -1983.2   
Df Residuals:         3292             LLR p-value:      1.5739e-94
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       8.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.2368   0.1568 -14.2639 0.0000 -2.5441 -1.9294
age                 1.9879   0.2696   7.3

## All linear terms + covariates

In [39]:
fluid_fs = 'fluid_day1'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]

X_ = pd.concat([Xf, Xe, Xc], axis = 1)
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.523442
         Iterations 7
                           fluid_day1:lv
Model:                Logit            Pseudo R-squared: 0.127     
Dependent Variable:   y                AIC:              3504.0456 
Date:                 2017-05-25 18:52 BIC:              3632.2253 
No. Observations:     3307             Log-Likelihood:   -1731.0   
Df Model:             20               LL-Null:          -1983.2   
Df Residuals:         3286             LLR p-value:      3.6692e-94
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       7.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.3007   0.1602 -14.3643 0.0000 -2.6146 -1.9868
fluid_day1         -0.0927   0.0463  -2.0

## lv_hyper only

In [40]:
fluid_fs = 'fluid_day1'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]

X_ = pd.concat([Xf, Xc], axis = 1)
X_['lv_hyper'] = Xe['lv_hyper']
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.524540
         Iterations 7
                           fluid_day1:lv
Model:                Logit            Pseudo R-squared: 0.125     
Dependent Variable:   y                AIC:              3503.3059 
Date:                 2017-05-25 18:52 BIC:              3607.0705 
No. Observations:     3307             Log-Likelihood:   -1734.7   
Df Model:             16               LL-Null:          -1983.2   
Df Residuals:         3290             LLR p-value:      1.4051e-95
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       7.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.2776   0.1583 -14.3873 0.0000 -2.5879 -1.9673
fluid_day1         -0.0960   0.0462  -2.0

## All interaction terms + covariates

In [41]:
X_ = make_data(fluid_fs, echo_fs)
X_ = pd.concat([X_, make_interactions(fluid_fs, echo_fs)], axis = 1)
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.521152
         Iterations 7
                             fluid_day1:lv
Model:                 Logit              Pseudo R-squared:   0.131     
Dependent Variable:    y                  AIC:                3510.8990 
Date:                  2017-05-25 18:53   BIC:                3706.2205 
No. Observations:      3307               Log-Likelihood:     -1723.4   
Df Model:              31                 LL-Null:            -1983.2   
Df Residuals:          3275               LLR p-value:        5.1885e-90
Converged:             1.0000             Scale:              1.0000    
No. Iterations:        7.0000                                           
------------------------------------------------------------------------
                         Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
------------------------------------------------------------------------
const                   -2.3094   0.1621 -14.2462 0.

## lv_mild + lv_hyper

In [58]:
fluid_fs = 'fluid_day1'
echo_fs = 'lv'

cv = ['age', 'age_over_90', 'mech_vent', 'mdrd', 'CHF', 'apsiii', 'lactate', 'micu', 'on_vaso']
Xc = X[cv]
Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]

X_ = pd.concat([Xf, Xc], axis = 1)
X_['lv_mild'] = Xe['lv_mild']
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.532253
         Iterations 8
                           fluid_day1:lv
Model:                Logit            Pseudo R-squared: 0.105     
Dependent Variable:   y                AIC:              3151.9409 
Date:                 2017-05-25 18:57 BIC:              3253.6420 
No. Observations:     2929             Log-Likelihood:   -1559.0   
Df Model:             16               LL-Null:          -1742.3   
Df Residuals:         2912             LLR p-value:      3.5034e-68
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       8.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.2347   0.1648 -13.5562 0.0000 -2.5578 -1.9116
fluid_day1         -0.0893   0.0478  -1.8

## lv_hyper and fluid_day1*lv_mild only

In [57]:
fluid_fs = 'fluid_day1'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]
Xi = make_interactions(fluid_fs, echo_fs)

X_ = pd.concat([Xf, Xc], axis = 1)
X_['lv_mild'] = Xe['lv_mild']
X_['fluid_day1*lv_mild'] = Xi['fluid_day1*lv_mild']
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.531864
         Iterations 8
                           fluid_day1:lv
Model:                Logit            Pseudo R-squared: 0.106     
Dependent Variable:   y                AIC:              3151.6572 
Date:                 2017-05-25 18:57 BIC:              3259.3407 
No. Observations:     2929             Log-Likelihood:   -1557.8   
Df Model:             17               LL-Null:          -1742.3   
Df Residuals:         2911             LLR p-value:      5.7116e-68
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       8.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.2413   0.1651 -13.5792 0.0000 -2.5648 -1.9178
fluid_day1         -0.0729   0.0490  -1.4

# Fluid day 2

In [44]:
def normalize(x):
    x = x.copy()
    I = ~np.isnan(x)
    x[I] = (x[I] - x[I].mean())/x[I].std()
    return x

choose = (ef_.fb_day2_balance_ml > -8000) & (ef_.fb_day2_balance_ml < 8000)
print(sum(choose))
ef = ef_.loc[choose]

X = pd.DataFrame()

# build the features table
age = ef.age/np.timedelta64('1', 'Y') # time in years
age_masked = age < 0 # if age is less than 0, then the person is above 89 (90?)
age = normalize(age)
age[age_masked] = 0
X['age'] = age
X['age_over_90'] = age_masked.astype(float)

gender = ef.gender
gender = (gender == 'F').astype(float)
X['gender'] = gender

bmi = ef.bmi
bmi_missing = np.isnan(bmi)
bmi = normalize(bmi)
bmi[bmi_missing] = 0
X['bmi'] = bmi
X['bmi_missing'] = bmi_missing.astype(float)

mech_vent = ef.vf_first_day_vent.astype(float)
X['mech_vent'] = mech_vent

chf = ef.ex_congestive_heart_failure.astype(float)
X['CHF'] = chf

mdrd = ef.mdrd
mdrd_missing = np.isnan(mdrd)
mdrd = normalize(mdrd)
mdrd[mdrd_missing] = 0
X['mdrd'] = mdrd

# MDRD missing is already largely covered by BMI missing
# the only other missing values come from missing creatinine lab
# so add that
creatinine_missing = np.isnan(ef.lab_creatinine)
X['creatinine_missing'] = creatinine_missing.astype(float)

apsiii = ef.apsiii
#apsiii_missing = np.isnan(apsiii)
apsiii = normalize(apsiii)
#apsiii[apsiii_missing] = 0
X['apsiii'] = apsiii
#X['apsiii_missing'] = apsiii_missing.astype(float)

no_echo = np.isnan(ef.ea_key)
#X['no_echo'] = no_echo.astype(float)

# -3 cannot assess
# -2 ?depressed
# -1 hyper
# 0 normal
# 1 mildly depressed
# 2 moderately depressed
# 3 severe
lvsys = ef.ea_lv_systolic
lv_missing = (((np.isnan(lvsys)) | (lvsys < -2)) & ~no_echo).astype(float)
lv_hyperdynamic = (lvsys == -1).astype(float)
lv_normal = (lvsys == 0).astype(float)
lv_mild = (lvsys == 1).astype(float)
lv_depressed = ((lvsys == 2) | (lvsys == -2)).astype(float)
lv_sev_depressed = (lvsys == 3).astype(float)
X['lv_missing'] = lv_missing
X['lv_normal'] = lv_normal
X['lv_mild'] = lv_mild
X['lv_hyper'] = lv_hyperdynamic
X['lv_depressed'] = lv_depressed
X['lv_severe'] = lv_sev_depressed

# -3 cannot assess
# -2 PA systolic hypertension
# 0 normal
# 1 mild
# 2 moderate
# 3 severe
tvpulm = ef.ea_tv_pulm_htn
tv_missing = (((np.isnan(tvpulm)) | (tvpulm < -2)) & ~no_echo).astype(float)
tv_normal = (tvpulm == 0).astype(float)
tv_abnormal = ((tvpulm == -2) | (tvpulm > 0)).astype(float)
X['plm_htn_missing'] = tv_missing
X['plm_htn_normal'] = tv_normal
X['plm_htn_abnormal'] = tv_abnormal

# -3 cannot assess 
# -2 dilated
# -1 small
# 0 normal
# 1 mildly dilated
# 2 moderately dilated
rvcavity = ef.ea_rv_cavity
rvcav_missing = (((np.isnan(rvcavity) | (rvcavity < -2))) & ~no_echo).astype(float)
rvcav_normal = (rvcavity == 0).astype(float)
rvcav_small = (rvcavity == -1).astype(float)
rvcav_dilated = ((rvcavity == -2) | (rvcavity > 0)).astype(float)
X['rvcav_missing'] = rvcav_missing
X['rvcav_normal'] = rvcav_normal
X['rvcav_small'] = rvcav_small
X['rvcav_dilated'] = rvcav_dilated

lactate = ef.lab_lactate
lactate_missing = np.isnan(lactate)
lactate = normalize(lactate)
lactate[lactate_missing] = 0
X['lactate'] = lactate
X['lactate_missing'] = lactate_missing.astype(float)

## which ICU was treating the patient
sicu = ef.st_sicu.astype(float)
sicu[np.isnan(sicu)] = 0
nsicu = ef.st_nsicu.astype(float)
nsicu[np.isnan(nsicu)] = 0
micu = ef.st_micu.astype(float)
micu[np.isnan(micu)] = 0
#X['sicu'] = sicu
#X['nsicu'] = nsicu
X['micu'] = micu

## on vasopressors
on_vaso = ef.filter_vaso.astype(float)
X['on_vaso'] = on_vaso

## fluids
fluid_day1 = ef.fb_day1_balance_ml.values.copy()
f1_missing = np.isnan(fluid_day1)
fluid_day1[~f1_missing] = normalize(fluid_day1[~f1_missing])
fluid_day1[f1_missing] = 0
X['fluid_day1'] = fluid_day1
X['fluid_day1_missing'] = f1_missing.astype(float)

fluid_day2 = ef.fb_day2_balance_ml.values.copy()
f2_missing = np.isnan(fluid_day1) | np.isnan(fluid_day2)
fluid_day2[~f2_missing] = normalize(fluid_day2[~f2_missing])
fluid_day2[f2_missing] = 0
X['fluid_day2'] = fluid_day2
X['fluid_day2_missing'] = f2_missing.astype(float)

fluid_day3 = ef.fb_day3_balance_ml.values.copy()
f3_missing = np.isnan(fluid_day1) | np.isnan(fluid_day2) | np.isnan(fluid_day3)
fluid_day3[~f3_missing] = normalize(fluid_day3[~f3_missing])
fluid_day3[f3_missing] = 0
X['fluid_day3'] = fluid_day3
X['fluid_day3_missing'] = f3_missing.astype(float)

y = (ef.dod - ef.intime) < np.timedelta64(30, 'D')

echo_features = {
    'lv': ['lv_missing', 'lv_mild', 'lv_hyper', 'lv_depressed', 'lv_severe'], 
    'plm_htn': ['plm_htn_missing', 'plm_htn_abnormal'], 
    'rvcav': ['rvcav_missing', 'rvcav_small', 'rvcav_dilated'], 
}
echo_features['all'] = echo_features['lv'] + echo_features['plm_htn'] + echo_features['rvcav']


fluid_features = {
    'fluid_day1': ['fluid_day1'], 
    'fluid_day2': ['fluid_day2'], 
    'fluid_day3': ['fluid_day3'],
}
fluid_missing = {
    'fluid_day1': ['fluid_day1_missing'], 
    'fluid_day2': ['fluid_day2_missing'], 
    'fluid_day3': ['fluid_day3_missing'],
}

# Age, gender, BMI, BMI missing, mech vent, CHF, MDRD, Cr missing, apache score, lactate, 
#lactate missing, ICU type, pressors indicator variable
covariates = ['age', 'age_over_90', # age
    "gender", # gender
    "bmi", "bmi_missing", # bmi
    "mech_vent", # mech_vent
    "CHF", # CHF
    "mdrd", "creatinine_missing", # MDRD
    "apsiii", # apache score
    "lactate", "lactate_missing", # lactate
    "micu", #"sicu", "nsicu", "micu", # icu type
    "on_vaso", # pressors indicator variable
]

def make_data(fluid_fs, echo_fs): 
    
    Xf = X[fluid_features[fluid_fs]]
    Xe = X[echo_features[echo_fs]]
    Xc = X[covariates]

    return pd.concat([Xf, Xe, Xc], axis = 1)

def make_interactions(fluid_fs, echo_fs): 
    
    Xf = X[fluid_features[fluid_fs]]
    Xe = X[echo_features[echo_fs]]
    Xc = X[covariates]

    # Interactions: fluids x echo variables, fluids x CHF, echo x CHF
    # Fluids x echo variables
    Xfe = {}
    for cf in Xf.columns: 
        for ce in Xe.columns: 
            Xfe["{}*{}".format(cf, ce)] = Xf[cf]*Xe[ce]
    Xfe = pd.DataFrame(Xfe)

    # Fluids x CHF
    Xfc = {}
    for cf in Xf.columns:
        Xfc["{}*CHF".format(cf)] = Xf[cf] * Xc['CHF']
    Xfc = pd.DataFrame(Xfc)

    # Echo x CHF
    Xec = {}
    for ce in Xe.columns: 
        Xec["{}*CHF".format(ce)] = Xe[ce] * Xc['CHF']
    Xec = pd.DataFrame(Xec)

    return pd.concat([Xfe, Xfc, Xec], axis = 1)

X.shape, y.shape

2929


((2929, 33), (2929,))

## Covariates only

In [45]:
fluid_fs = 'fluid_day2'
echo_fs = 'lv'

Xc = X[covariates]
X_ = Xc
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.532963
         Iterations 8
                           fluid_day2:lv
Model:                Logit            Pseudo R-squared: 0.104     
Dependent Variable:   y                AIC:              3152.0951 
Date:                 2017-05-25 18:53 BIC:              3241.8313 
No. Observations:     2929             Log-Likelihood:   -1561.0   
Df Model:             14               LL-Null:          -1742.3   
Df Residuals:         2914             LLR p-value:      9.9195e-69
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       8.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.2513   0.1645 -13.6839 0.0000 -2.5737 -1.9288
age                 2.0221   0.2807   7.2

## All covariates and linear terms

In [46]:
fluid_fs = 'fluid_day2'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]

X_ = pd.concat([Xf, Xe, Xc], axis = 1)
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.531075
         Iterations 9
                           fluid_day2:lv
Model:                Logit            Pseudo R-squared: 0.107     
Dependent Variable:   y                AIC:              3153.0386 
Date:                 2017-05-25 18:53 BIC:              3278.6693 
No. Observations:     2929             Log-Likelihood:   -1555.5   
Df Model:             20               LL-Null:          -1742.3   
Df Residuals:         2908             LLR p-value:      6.1964e-67
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       9.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.3197   0.1683 -13.7821 0.0000 -2.6496 -1.9898
fluid_day2          0.0848   0.0479   1.7

## lv_hyper only

In [47]:
fluid_fs = 'fluid_day2'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]

X_ = pd.concat([Xf, Xc], axis = 1)
X_['lv_hyper'] = Xe['lv_hyper']
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.531542
         Iterations 9
                           fluid_day2:lv
Model:                Logit            Pseudo R-squared: 0.106     
Dependent Variable:   y                AIC:              3147.7725 
Date:                 2017-05-25 18:53 BIC:              3249.4736 
No. Observations:     2929             Log-Likelihood:   -1556.9   
Df Model:             16               LL-Null:          -1742.3   
Df Residuals:         2912             LLR p-value:      4.7152e-69
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       9.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.3026   0.1665 -13.8294 0.0000 -2.6290 -1.9763
fluid_day2          0.0821   0.0478   1.7

## All interaction terms and covariates

In [48]:
fluid_fs = 'fluid_day2'
echo_fs = 'lv'

X_ = make_data(fluid_fs, echo_fs)
X_ = pd.concat([X_, make_interactions(fluid_fs, echo_fs)], axis = 1)
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.528174
         Iterations 8
                             fluid_day2:lv
Model:                 Logit              Pseudo R-squared:   0.112     
Dependent Variable:    y                  AIC:                3158.0405 
Date:                  2017-05-25 18:53   BIC:                3349.4778 
No. Observations:      2929               Log-Likelihood:     -1547.0   
Df Model:              31                 LL-Null:            -1742.3   
Df Residuals:          2897               LLR p-value:        8.3242e-64
Converged:             1.0000             Scale:              1.0000    
No. Iterations:        8.0000                                           
------------------------------------------------------------------------
                         Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
------------------------------------------------------------------------
const                   -2.3385   0.1707 -13.6988 0.

## lv_mild only

In [49]:
fluid_fs = 'fluid_day2'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]

X_ = pd.concat([Xf, Xc], axis = 1)
X_['lv_mild'] = Xe['lv_mild']
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.532319
         Iterations 9
                           fluid_day2:lv
Model:                Logit            Pseudo R-squared: 0.105     
Dependent Variable:   y                AIC:              3152.3252 
Date:                 2017-05-25 18:54 BIC:              3254.0262 
No. Observations:     2929             Log-Likelihood:   -1559.2   
Df Model:             16               LL-Null:          -1742.3   
Df Residuals:         2912             LLR p-value:      4.2146e-68
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       9.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.2509   0.1653 -13.6198 0.0000 -2.5748 -1.9269
fluid_day2          0.0843   0.0478   1.7

## lv_hyper and fluid_day2*lv_mild only

In [50]:
fluid_fs = 'fluid_day2'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]
Xi = make_interactions(fluid_fs, echo_fs)

X_ = pd.concat([Xf, Xc], axis = 1)
X_['lv_mild'] = Xe['lv_mild']
X_['fluid_day2*lv_mild'] = Xi['fluid_day2*lv_mild']
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.531141
         Iterations 9
                           fluid_day2:lv
Model:                Logit            Pseudo R-squared: 0.107     
Dependent Variable:   y                AIC:              3147.4249 
Date:                 2017-05-25 18:54 BIC:              3255.1083 
No. Observations:     2929             Log-Likelihood:   -1555.7   
Df Model:             17               LL-Null:          -1742.3   
Df Residuals:         2911             LLR p-value:      7.4932e-69
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       9.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.2587   0.1656 -13.6381 0.0000 -2.5833 -1.9341
fluid_day2          0.1112   0.0490   2.2

# Fluid day 3

In [27]:
def normalize(x):
    x = x.copy()
    I = ~np.isnan(x)
    x[I] = (x[I] - x[I].mean())/x[I].std()
    return x

choose = (ef_.fb_day3_balance_ml > -8000) & (ef_.fb_day3_balance_ml < 8000)
print(sum(choose))
ef = ef_.loc[choose]

X = pd.DataFrame()

# build the features table
age = ef.age/np.timedelta64('1', 'Y') # time in years
age_masked = age < 0 # if age is less than 0, then the person is above 89 (90?)
age = normalize(age)
age[age_masked] = 0
X['age'] = age
X['age_over_90'] = age_masked.astype(float)

gender = ef.gender
gender = (gender == 'F').astype(float)
X['gender'] = gender

bmi = ef.bmi
bmi_missing = np.isnan(bmi)
bmi = normalize(bmi)
bmi[bmi_missing] = 0
X['bmi'] = bmi
X['bmi_missing'] = bmi_missing.astype(float)

mech_vent = ef.vf_first_day_vent.astype(float)
X['mech_vent'] = mech_vent

chf = ef.ex_congestive_heart_failure.astype(float)
X['CHF'] = chf

mdrd = ef.mdrd
mdrd_missing = np.isnan(mdrd)
mdrd = normalize(mdrd)
mdrd[mdrd_missing] = 0
X['mdrd'] = mdrd

# MDRD missing is already largely covered by BMI missing
# the only other missing values come from missing creatinine lab
# so add that
creatinine_missing = np.isnan(ef.lab_creatinine)
X['creatinine_missing'] = creatinine_missing.astype(float)

apsiii = ef.apsiii
#apsiii_missing = np.isnan(apsiii)
apsiii = normalize(apsiii)
#apsiii[apsiii_missing] = 0
X['apsiii'] = apsiii
#X['apsiii_missing'] = apsiii_missing.astype(float)

no_echo = np.isnan(ef.ea_key)
#X['no_echo'] = no_echo.astype(float)

# -3 cannot assess
# -2 ?depressed
# -1 hyper
# 0 normal
# 1 mildly depressed
# 2 moderately depressed
# 3 severe
lvsys = ef.ea_lv_systolic
lv_missing = (((np.isnan(lvsys)) | (lvsys < -2)) & ~no_echo).astype(float)
lv_hyperdynamic = (lvsys == -1).astype(float)
lv_normal = (lvsys == 0).astype(float)
lv_mild = (lvsys == 1).astype(float)
lv_depressed = ((lvsys == 2) | (lvsys == -2)).astype(float)
lv_sev_depressed = (lvsys == 3).astype(float)
X['lv_missing'] = lv_missing
X['lv_normal'] = lv_normal
X['lv_mild'] = lv_mild
X['lv_hyper'] = lv_hyperdynamic
X['lv_depressed'] = lv_depressed
X['lv_severe'] = lv_sev_depressed

# -3 cannot assess
# -2 PA systolic hypertension
# 0 normal
# 1 mild
# 2 moderate
# 3 severe
tvpulm = ef.ea_tv_pulm_htn
tv_missing = (((np.isnan(tvpulm)) | (tvpulm < -2)) & ~no_echo).astype(float)
tv_normal = (tvpulm == 0).astype(float)
tv_abnormal = ((tvpulm == -2) | (tvpulm > 0)).astype(float)
X['plm_htn_missing'] = tv_missing
X['plm_htn_normal'] = tv_normal
X['plm_htn_abnormal'] = tv_abnormal

# -3 cannot assess 
# -2 dilated
# -1 small
# 0 normal
# 1 mildly dilated
# 2 moderately dilated
rvcavity = ef.ea_rv_cavity
rvcav_missing = (((np.isnan(rvcavity) | (rvcavity < -2))) & ~no_echo).astype(float)
rvcav_normal = (rvcavity == 0).astype(float)
rvcav_small = (rvcavity == -1).astype(float)
rvcav_dilated = ((rvcavity == -2) | (rvcavity > 0)).astype(float)
X['rvcav_missing'] = rvcav_missing
X['rvcav_normal'] = rvcav_normal
X['rvcav_small'] = rvcav_small
X['rvcav_dilated'] = rvcav_dilated

lactate = ef.lab_lactate
lactate_missing = np.isnan(lactate)
lactate = normalize(lactate)
lactate[lactate_missing] = 0
X['lactate'] = lactate
X['lactate_missing'] = lactate_missing.astype(float)

## which ICU was treating the patient
sicu = ef.st_sicu.astype(float)
sicu[np.isnan(sicu)] = 0
nsicu = ef.st_nsicu.astype(float)
nsicu[np.isnan(nsicu)] = 0
micu = ef.st_micu.astype(float)
micu[np.isnan(micu)] = 0
#X['sicu'] = sicu
#X['nsicu'] = nsicu
X['micu'] = micu

## on vasopressors
on_vaso = ef.filter_vaso.astype(float)
X['on_vaso'] = on_vaso

## fluids
fluid_day1 = ef.fb_day1_balance_ml.values.copy()
f1_missing = np.isnan(fluid_day1)
fluid_day1[~f1_missing] = normalize(fluid_day1[~f1_missing])
fluid_day1[f1_missing] = 0
X['fluid_day1'] = fluid_day1
X['fluid_day1_missing'] = f1_missing.astype(float)

fluid_day2 = ef.fb_day2_balance_ml.values.copy()
f2_missing = np.isnan(fluid_day1) | np.isnan(fluid_day2)
fluid_day2[~f2_missing] = normalize(fluid_day2[~f2_missing])
fluid_day2[f2_missing] = 0
X['fluid_day2'] = fluid_day2
X['fluid_day2_missing'] = f2_missing.astype(float)

fluid_day3 = ef.fb_day3_balance_ml.values.copy()
f3_missing = np.isnan(fluid_day1) | np.isnan(fluid_day2) | np.isnan(fluid_day3)
fluid_day3[~f3_missing] = normalize(fluid_day3[~f3_missing])
fluid_day3[f3_missing] = 0
X['fluid_day3'] = fluid_day3
X['fluid_day3_missing'] = f3_missing.astype(float)

y = (ef.dod - ef.intime) < np.timedelta64(30, 'D')

echo_features = {
    'lv': ['lv_missing', 'lv_mild', 'lv_hyper', 'lv_depressed', 'lv_severe'], 
    'plm_htn': ['plm_htn_missing', 'plm_htn_abnormal'], 
    'rvcav': ['rvcav_missing', 'rvcav_small', 'rvcav_dilated'], 
}
echo_features['all'] = echo_features['lv'] + echo_features['plm_htn'] + echo_features['rvcav']


fluid_features = {
    'fluid_day1': ['fluid_day1'], 
    'fluid_day2': ['fluid_day2'], 
    'fluid_day3': ['fluid_day3'],
}
fluid_missing = {
    'fluid_day1': ['fluid_day1_missing'], 
    'fluid_day2': ['fluid_day2_missing'], 
    'fluid_day3': ['fluid_day3_missing'],
}

# Age, gender, BMI, BMI missing, mech vent, CHF, MDRD, Cr missing, apache score, lactate, 
#lactate missing, ICU type, pressors indicator variable
covariates = ['age', 'age_over_90', # age
    "gender", # gender
    "bmi", "bmi_missing", # bmi
    "mech_vent", # mech_vent
    "CHF", # CHF
    "mdrd", "creatinine_missing", # MDRD
    "apsiii", # apache score
    "lactate", "lactate_missing", # lactate
    "micu", #"sicu", "nsicu", "micu", # icu type
    "on_vaso", # pressors indicator variable
]

def make_data(fluid_fs, echo_fs): 
    
    Xf = X[fluid_features[fluid_fs]]
    Xe = X[echo_features[echo_fs]]
    Xc = X[covariates]

    return pd.concat([Xf, Xe, Xc], axis = 1)

def make_interactions(fluid_fs, echo_fs): 
    
    Xf = X[fluid_features[fluid_fs]]
    Xe = X[echo_features[echo_fs]]
    Xc = X[covariates]

    # Interactions: fluids x echo variables, fluids x CHF, echo x CHF
    # Fluids x echo variables
    Xfe = {}
    for cf in Xf.columns: 
        for ce in Xe.columns: 
            Xfe["{}*{}".format(cf, ce)] = Xf[cf]*Xe[ce]
    Xfe = pd.DataFrame(Xfe)

    # Fluids x CHF
    Xfc = {}
    for cf in Xf.columns:
        Xfc["{}*CHF".format(cf)] = Xf[cf] * Xc['CHF']
    Xfc = pd.DataFrame(Xfc)

    # Echo x CHF
    Xec = {}
    for ce in Xe.columns: 
        Xec["{}*CHF".format(ce)] = Xe[ce] * Xc['CHF']
    Xec = pd.DataFrame(Xec)

    return pd.concat([Xfe, Xfc, Xec], axis = 1)

X.shape, y.shape

2379


((2379, 33), (2379,))

## Covariates only

In [28]:
fluid_fs = 'fluid_day3'
echo_fs = 'lv'

Xc = X[covariates]
X_ = Xc
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.550361
         Iterations 9
                           fluid_day3:lv
Model:                Logit            Pseudo R-squared: 0.081     
Dependent Variable:   y                AIC:              2648.6166 
Date:                 2017-05-25 18:47 BIC:              2735.2331 
No. Observations:     2379             Log-Likelihood:   -1309.3   
Df Model:             14               LL-Null:          -1425.4   
Df Residuals:         2364             LLR p-value:      1.4051e-41
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       9.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.1415   0.1775 -12.0646 0.0000 -2.4894 -1.7936
age                 1.7421   0.2906   5.9

## All covariates and linear terms

In [29]:
fluid_fs = 'fluid_day3'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]

X_ = pd.concat([Xf, Xe, Xc], axis = 1)
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.545469
         Iterations 8
                           fluid_day3:lv
Model:                Logit            Pseudo R-squared: 0.090     
Dependent Variable:   y                AIC:              2637.3410 
Date:                 2017-05-25 18:48 BIC:              2758.6042 
No. Observations:     2379             Log-Likelihood:   -1297.7   
Df Model:             20               LL-Null:          -1425.4   
Df Residuals:         2358             LLR p-value:      9.2742e-43
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       8.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.2091   0.1837 -12.0278 0.0000 -2.5691 -1.8491
fluid_day3          0.2148   0.0519   4.1

## lv_hyper only

In [30]:
fluid_fs = 'fluid_day3'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]

X_ = pd.concat([Xf, Xc], axis = 1)
X_['lv_hyper'] = Xe['lv_hyper']
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.546084
         Iterations 8
                           fluid_day3:lv
Model:                Logit            Pseudo R-squared: 0.089     
Dependent Variable:   y                AIC:              2632.2699 
Date:                 2017-05-25 18:48 BIC:              2730.4353 
No. Observations:     2379             Log-Likelihood:   -1299.1   
Df Model:             16               LL-Null:          -1425.4   
Df Residuals:         2362             LLR p-value:      1.6079e-44
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       8.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.2091   0.1819 -12.1432 0.0000 -2.5657 -1.8525
fluid_day3          0.2121   0.0518   4.0

## All interaction terms and covariates

In [32]:
fluid_fs = 'fluid_day3'
echo_fs = 'lv'

X_ = make_data(fluid_fs, echo_fs)
X_ = pd.concat([X_, make_interactions(fluid_fs, echo_fs)], axis = 1)
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.543431
         Iterations 8
                             fluid_day3:lv
Model:                 Logit              Pseudo R-squared:   0.093     
Dependent Variable:    y                  AIC:                2649.6439 
Date:                  2017-05-25 18:51   BIC:                2834.4259 
No. Observations:      2379               Log-Likelihood:     -1292.8   
Df Model:              31                 LL-Null:            -1425.4   
Df Residuals:          2347               LLR p-value:        5.4157e-39
Converged:             1.0000             Scale:              1.0000    
No. Iterations:        8.0000                                           
------------------------------------------------------------------------
                         Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
------------------------------------------------------------------------
const                   -2.2163   0.1864 -11.8924 0.

## lv_mild only

In [35]:
fluid_fs = 'fluid_day3'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]

X_ = pd.concat([Xf, Xc], axis = 1)
X_['lv_mild'] = Xe['lv_mild']
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.546414
         Iterations 8
                           fluid_day3:lv
Model:                Logit            Pseudo R-squared: 0.088     
Dependent Variable:   y                AIC:              2633.8377 
Date:                 2017-05-25 18:52 BIC:              2732.0031 
No. Observations:     2379             Log-Likelihood:   -1299.9   
Df Model:             16               LL-Null:          -1425.4   
Df Residuals:         2362             LLR p-value:      3.3721e-44
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       8.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.1562   0.1807 -11.9333 0.0000 -2.5103 -1.8020
fluid_day3          0.2152   0.0519   4.1

## lv_mild + lv_mild * fluid

In [34]:
fluid_fs = 'fluid_day3'
echo_fs = 'lv'

Xc = X[covariates]
Xe = X[echo_features[echo_fs]]
Xf = X[fluid_features[fluid_fs]]
Xi = make_interactions(fluid_fs, echo_fs)

X_ = pd.concat([Xf, Xc], axis = 1)
X_['lv_mild'] = Xe['lv_mild']
X_['fluid_day3*lv_mild'] = Xi['fluid_day3*lv_mild']
X_ = sm.add_constant(X_)

logit = sm.Logit(y, X_)
res = logit.fit()

print(res.summary2(title = '{}:{}'.format(fluid_fs, echo_fs)))

Optimization terminated successfully.
         Current function value: 0.545261
         Iterations 8
                           fluid_day3:lv
Model:                Logit            Pseudo R-squared: 0.090     
Dependent Variable:   y                AIC:              2630.3535 
Date:                 2017-05-25 18:52 BIC:              2734.2934 
No. Observations:     2379             Log-Likelihood:   -1297.2   
Df Model:             17               LL-Null:          -1425.4   
Df Residuals:         2361             LLR p-value:      1.0308e-44
Converged:            1.0000           Scale:            1.0000    
No. Iterations:       8.0000                                       
-------------------------------------------------------------------
                    Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------
const              -2.1625   0.1810 -11.9508 0.0000 -2.5172 -1.8079
fluid_day3          0.2431   0.0533   4.5