In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
import pylab as pl
from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.common import codelists
from demres.common.helper_functions import *
from demins.functions import *

In [4]:
pd.set_option('display.max_columns', None)

In [7]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [12]:
pt_features['intercept'] = 1.0

In [58]:
pt_features.sample(3)

Unnamed: 0,patid,gender,yob,pracid,index_date,isCase,final dementia medcode,data_start,data_end,matchid,stroke_window0,stroke_window1,intellectual_disability,CHD_heart_failure_and_peripheral_vascular_disease_window0,CHD_heart_failure_and_peripheral_vascular_disease_window1,hypertension_window0,hypertension_window1,diabetes_window0,diabetes_window1,clin_sig_alcohol_use_window0,clin_sig_alcohol_use_window1,mental_illness_non_smi_window0,mental_illness_non_smi_window1,mental_illness_smi,sleep_apnoea_window0,sleep_apnoea_window1,current_smoker_window0,current_smoker_window1,asthma_window0,asthma_window1,COPD,CKD_window0,CKD_window1,insomnia_window0,insomnia_window1,mood_stabilisers_pdds_window0,mood_stabilisers_pdds_window1,sedatives_pdds_window0,sedatives_pdds_window1,fgas_pdds_window0,fgas_pdds_window1,sgas_pdds_window0,sgas_pdds_window1,sga_depots_pdds_window0,sga_depots_pdds_window1,fga_depots_pdds_window0,fga_depots_pdds_window1,antidepressants_pdds_window0,antidepressants_pdds_window1,consultation_count_window0,consultation_count_window1,intercept
2640,916013,1,23,13,2006-01-19,0,0.0,1995-07-28,2007-12-13,2336.0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1856,1170,0,0,0,0,0,0,0,0,0,0,173,159,1
3659,5772115,1,20,115,2007-05-10,0,0.0,1996-03-03,2008-12-02,16271.0,0,0,0,4,3,12,6,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,105,106,1
3827,16393628,1,24,628,2009-11-26,1,8195.0,1998-11-15,2011-09-07,87645.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,124,160,1


## Window 0 (exposures measured 5-10 years before index date)

In [38]:
training_cols_window0 = [col for col in pt_features.columns if all([col not in ['patid','index_date','matchid','data_start','data_end','final dementia medcode','isCase','yob','pracid'],'window1' not in col])]
training_cols_window0.insert(0, training_cols_window0.pop(training_cols_window0.index('insomnia_window0')))

In [39]:
univariate_results, multivariate_results,multivariate_summary = get_univariate_and_multivariate_results(pt_features,training_cols_window0)

Optimization terminated successfully.
         Current function value: 0.686532
         Iterations 7


### Univariate results

In [40]:
univariate_results

Unnamed: 0,odds_ratio,p_value
insomnia_window0,1.057,0.002
gender,1.0,1.0
stroke_window0,1.0928,0.0
CHD_heart_failure_and_peripheral_vascular_disease_window0,1.0131,0.185
hypertension_window0,0.9934,0.379
diabetes_window0,1.0368,0.016
mental_illness_non_smi_window0,1.0809,0.0
mental_illness_smi,1.1386,0.0
current_smoker_window0,1.0179,0.207
asthma_window0,0.9996,0.984


### Multivariate results

In [41]:
multivariate_results

Unnamed: 0,odds_ratio,p_value
insomnia_window0,1.0214,0.238
gender,0.9682,0.293
stroke_window0,1.0637,0.014
CHD_heart_failure_and_peripheral_vascular_disease_window0,0.9904,0.363
hypertension_window0,0.9818,0.026
diabetes_window0,1.0132,0.394
mental_illness_non_smi_window0,1.0509,0.0
mental_illness_smi,1.093,0.0
current_smoker_window0,1.0031,0.836
asthma_window0,0.9926,0.759


In [56]:
multivariate_summary.tables[0]

0,1,2,3
Dep. Variable:,isCase,No. Observations:,18876.0
Model:,Logit,Df Residuals:,18856.0
Method:,MLE,Df Model:,19.0
Date:,"Wed, 03 May 2017",Pseudo R-squ.:,0.03106
Time:,17:44:01,Log-Likelihood:,-12678.0
converged:,True,LL-Null:,-13084.0
,,LLR p-value:,4.362e-160


## Window 1 (exposures measured 0-5 years before index date)

In [59]:
training_cols_window1 = [col for col in pt_features.columns if all([col not in ['patid','index_date','matchid','data_start','data_end','final dementia medcode','isCase','yob','pracid'],'window0' not in col])]
training_cols_window1.insert(0, training_cols_window1.pop(training_cols_window1.index('insomnia_window1')))

In [60]:
univariate_results, multivariate_results, multivariate_summary = get_univariate_and_multivariate_results(pt_features,training_cols_window1)

Optimization terminated successfully.
         Current function value: 0.671621
         Iterations 6


In [61]:
univariate_results

Unnamed: 0,odds_ratio,p_value
insomnia_window1,1.0435,0.004
gender,1.0,1.0
stroke_window1,1.2524,0.0
CHD_heart_failure_and_peripheral_vascular_disease_window1,0.9791,0.125
hypertension_window1,0.9908,0.324
diabetes_window1,1.0366,0.013
mental_illness_non_smi_window1,1.2008,0.0
mental_illness_smi,1.1386,0.0
current_smoker_window1,1.0019,0.824
asthma_window1,1.0022,0.94


In [62]:
multivariate_results

Unnamed: 0,odds_ratio,p_value
insomnia_window1,0.993,0.631
gender,0.9596,0.185
stroke_window1,1.2049,0.0
CHD_heart_failure_and_peripheral_vascular_disease_window1,0.9209,0.0
hypertension_window1,0.9842,0.121
diabetes_window1,1.0035,0.813
mental_illness_non_smi_window1,1.1771,0.0
mental_illness_smi,1.0454,0.006
current_smoker_window1,0.9917,0.374
asthma_window1,0.9915,0.804


In [63]:
multivariate_summary.tables[0]

0,1,2,3
Dep. Variable:,isCase,No. Observations:,18876.0
Model:,Logit,Df Residuals:,18856.0
Method:,MLE,Df Model:,19.0
Date:,"Wed, 03 May 2017",Pseudo R-squ.:,0.03106
Time:,17:46:43,Log-Likelihood:,-12678.0
converged:,True,LL-Null:,-13084.0
,,LLR p-value:,4.362e-160
