In [35]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [36]:
cd ~/demres

/Users/zurfarosa/demres


In [37]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor 

from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [80]:
!pip3 list

appdirs (1.4.2)
appnope (0.1.0)
bleach (1.5.0)
decorator (4.0.11)
entrypoints (0.2.2)
graphviz (0.6)
html5lib (0.9999999)
ipykernel (4.5.2)
ipython (5.3.0)
ipython-genutils (0.1.0)
ipywidgets (5.2.2)
Jinja2 (2.9.5)
jsonschema (2.6.0)
jupyter (1.0.0)
jupyter-client (5.0.0)
jupyter-console (5.1.0)
jupyter-core (4.3.0)
MarkupSafe (0.23)
mistune (0.7.3)
nbconvert (5.1.1)
nbformat (4.3.0)
notebook (4.4.1)
numexpr (2.6.2)
numpy (1.12.0)
packaging (16.8)
pandas (0.19.2)
pandocfilters (1.4.1)
patsy (0.4.1)
pexpect (4.2.1)
pickleshare (0.7.4)
pip (9.0.1)
prompt-toolkit (1.0.13)
ptyprocess (0.5.1)
Pygments (2.2.0)
pyparsing (2.1.10)
python-dateutil (2.6.0)
pytz (2016.10)
pyzmq (16.0.2)
qtconsole (4.2.1)
scipy (0.19.1)
setuptools (34.3.2)
simplegeneric (0.8.1)
six (1.10.0)
statsmodels (0.8.0)
tables (3.3.0)
terminado (0.6)
testpath (0.3)
tornado (4.4.2)
traitlets (4.3.2)
wcwidth (0.1.7)
wheel (0.24.0)
widgetsnbextension (1.2.6)


In [38]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [39]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [40]:
window = '10_to_5' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [41]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [42]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_end', 'data_start', 'matchid',
       'reason_for_removal', 'age_at_index_date', 'insomnia', 'stroke',
       'non_stroke_vascular_disease', 'hypertension', 'diabetes',
       'mental_illness_non_smi', 'mental_illness_smi', 'sleep_apnoea',
       'chronic_pulmonary_disease', 'epilepsy',
       'non_insomnia_GP_consultations', 'benzo_and_z_drugs_any',
       'insomnia_any', 'insomnia_count:0', 'insomnia_count:1_5',
       'insomnia_count:above_5', 'non_insomnia_GP_consultations:0',
       'non_insomnia_GP_consultations:1_10',
       'non_insomnia_GP_consultations:11_100',
       'non_insomnia_GP_consultations:101_1000',
       'non_insomnia_GP_consultations:above_1000', 'age_at_index_date:65-69',
       'age_at_index_date:70-74', 'age_at_index_date:75-79',
       'age_at_index_date:80-84', 'age_at_index_date:85-89',
       'age_at_index_date:90-99', 'age_at_index_date:above_99',
   

In [43]:
pt_features.loc['intercept'] = 1.0

In [44]:
pt_features.sample(5)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,reason_for_removal,age_at_index_date,insomnia,stroke,non_stroke_vascular_disease,hypertension,diabetes,mental_illness_non_smi,mental_illness_smi,sleep_apnoea,chronic_pulmonary_disease,epilepsy,non_insomnia_GP_consultations,benzo_and_z_drugs_any,insomnia_any,insomnia_count:0,insomnia_count:1_5,insomnia_count:above_5,non_insomnia_GP_consultations:0,non_insomnia_GP_consultations:1_10,non_insomnia_GP_consultations:11_100,non_insomnia_GP_consultations:101_1000,non_insomnia_GP_consultations:above_1000,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,antidepressant_pdds:00000,antidepressant_pdds:00001_10,antidepressant_pdds:00011_100,antidepressant_pdds:00101_1000,antidepressant_pdds:01001_10000,antidepressant_pdds:10000_and_above,antipsychotic_pdds:00000,antipsychotic_pdds:00001_10,antipsychotic_pdds:00011_100,antipsychotic_pdds:00101_1000,antipsychotic_pdds:01001_10000,antipsychotic_pdds:10000_and_above,depot_antipsychotic_pdds:00000,depot_antipsychotic_pdds:00001_10,depot_antipsychotic_pdds:00011_100,depot_antipsychotic_pdds:00101_1000,depot_antipsychotic_pdds:01001_10000,depot_antipsychotic_pdds:10000_and_above,other_sedative_pdds:00000,other_sedative_pdds:00001_10,other_sedative_pdds:00011_100,other_sedative_pdds:00101_1000,other_sedative_pdds:01001_10000,other_sedative_pdds:10000_and_above,benzo_and_z_drug_pdds:00000,benzo_and_z_drug_pdds:00001_10,benzo_and_z_drug_pdds:00011_100,benzo_and_z_drug_pdds:00101_1000,benzo_and_z_drug_pdds:01001_10000,benzo_and_z_drug_pdds:10000_and_above,mood_stabiliser_pdds:00000,mood_stabiliser_pdds:00001_10,mood_stabiliser_pdds:00011_100,mood_stabiliser_pdds:00101_1000,mood_stabiliser_pdds:01001_10000,mood_stabiliser_pdds:10000_and_above,mood_stabilisers_100_pdds,benzo_and_z_drugs_100_pdds,other_sedatives_100_pdds,antipsychotics_100_pdds,depot_antipsychotics_100_pdds,antidepressants_100_pdds
8922,9391021.0,29.0,21.0,0.0,2008-10-09 00:00:00,0.0,,2012-02-24 00:00:00,1998-09-18 00:00:00,54879.0,,79.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.227073,0.0,0.0,0.0
3459,6738344.0,30.0,344.0,1.0,2006-06-24 00:00:00,1.0,6578.0,2006-07-19 00:00:00,1994-01-31 00:00:00,51983.0,,76.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,63.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6291,11505155.0,19.0,155.0,0.0,2011-04-28 00:00:00,1.0,1916.0,2011-11-07 00:00:00,1998-10-15 00:00:00,22923.0,,92.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,107.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.129284,0.0,0.0
18141,4081294.0,27.0,294.0,1.0,2008-05-07 00:00:00,0.0,,2013-05-07 00:00:00,1998-01-31 00:00:00,11446.0,,81.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,142.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.859542
29199,5790391.0,21.0,391.0,1.0,2005-06-01 00:00:00,1.0,7664.0,2013-04-23 00:00:00,1993-01-31 00:00:00,57921.0,,84.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,108.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [45]:
columns_always_for_inclusion = [
    'age_at_index_date',
    'female',
#      'non_insomnia_GP_consultations', removed as VIF 4.3 (more importantly, age at index date is 5.7 in its presence)
     'stroke',
    'non_stroke_vascular_disease',
    'hypertension',
    'diabetes', 
    'mental_illness_non_smi', 
    'mental_illness_smi',
    'sleep_apnoea', 'chronic_pulmonary_disease', 'epilepsy',
    'mood_stabilisers_100_pdds',
    'other_sedatives_100_pdds', 'antidepressants_100_pdds', 
    'depot_antipsychotics_100_pdds', 'antipsychotics_100_pdds',
    'benzo_and_z_drugs_100_pdds'
]

## Insomnia consultations

### insomnia yes/no

In [71]:
cols_for_inclusion_here = ['insomnia_any','benzo_and_z_drugs_100_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
stroke  being retained as mean > 0
non_stroke_vascular_disease  being retained as mean > 0
hypertension  being retained as mean > 0
diabetes  being retained as mean > 0
mental_illness_non_smi  being retained as mean > 0
mental_illness_smi  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
insomnia_any  being retained as mean > 0
mood_stabilisers_100_pdds  being retained as mean > 0
benzo_and_z_drugs_100_pdds  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
depot_antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0


*Stage 1*
Univariate results
                               Univariate OR    [0.025    0.975]  \
age_at_index_date                   1.000000  0.999728 

In [72]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,[0.025,0.975],p value,Multivariate OR,multi [0.025,multi 0.975],multi p value
age_at_index_date,1.0,0.999728,1.000272,0.9999447,0.99483,0.994371,0.995289,1.423492e-107
antidepressants_100_pdds,1.030238,1.0243,1.03621,5.447317e-24,1.013071,1.007499,1.018675,3.941123e-06
benzo_and_z_drugs_100_pdds,1.021076,1.015973,1.026203,3.349874e-16,1.00917,1.003972,1.014396,0.0005319407
chronic_pulmonary_disease,1.373408,1.304589,1.445857,1.092113e-33,1.370186,1.292539,1.452497,3.645284e-26
diabetes,1.386815,1.289873,1.491043,9.193739999999999e-19,1.339094,1.238185,1.448228,2.780149e-13
epilepsy,1.751515,1.446632,2.120653,9.239646e-09,1.459261,1.18752,1.793184,0.0003247975
hypertension,1.081301,1.045818,1.117987,4.398096e-06,1.085365,1.035664,1.13745,0.0006142792
insomnia_any,1.584746,1.442463,1.741063,8.572512e-22,1.309812,1.184818,1.447993,1.33418e-07
mental_illness_non_smi,1.581271,1.516243,1.649088,1.765649e-101,1.712652,1.62591,1.804021,1.597615e-91
mood_stabilisers_100_pdds,1.031925,1.019629,1.044368,2.769987e-07,1.013685,1.001752,1.02576,0.02446469


In [73]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","0.99 (0.99, 1.00), P=0.000"
Antidepressants (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01 (1.01, 1.02), P=0.000"
Benzodiazepines and z-drugs (100 PDDs),"1.02, (1.02, 1.03), P=0.000","1.01 (1.00, 1.01), P=0.001"
Chronic pulmonary disease,"1.37, (1.30, 1.45), P=0.000","1.37 (1.29, 1.45), P=0.000"
Diabetes,"1.39, (1.29, 1.49), P=0.000","1.34 (1.24, 1.45), P=0.000"
Epilepsy,"1.75, (1.45, 2.12), P=0.000","1.46 (1.19, 1.79), P=0.000"
Hypertension,"1.08, (1.05, 1.12), P=0.000","1.09 (1.04, 1.14), P=0.001"
Insomnia any,"1.58, (1.44, 1.74), P=0.000","1.31 (1.18, 1.45), P=0.000"
Mental illness non smi,"1.58, (1.52, 1.65), P=0.000","1.71 (1.63, 1.80), P=0.000"
Mood stabilisers (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01 (1.00, 1.03), P=0.024"


### insomnia (continuous variable)

In [74]:
# cols_for_inclusion_here = ['insomnia','benzo_and_z_drugs_100_pdds']
# training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
# summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

In [75]:
# univariate_and_multivariate_results

### insomnia quantiles

In [76]:
cols_for_inclusion_here = ['insomnia_count:1_5','insomnia_count:above_5','benzo_and_z_drugs_100_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
stroke  being retained as mean > 0
non_stroke_vascular_disease  being retained as mean > 0
hypertension  being retained as mean > 0
diabetes  being retained as mean > 0
mental_illness_non_smi  being retained as mean > 0
mental_illness_smi  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
insomnia_count:1_5  being retained as mean > 0
insomnia_count:above_5  being retained as mean > 0
mood_stabilisers_100_pdds  being retained as mean > 0
benzo_and_z_drugs_100_pdds  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
depot_antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0


*Stage 1*
Univariate results
                               Univariate OR    [0.025    0.975]  

In [77]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,[0.025,0.975],p value,Multivariate OR,multi [0.025,multi 0.975],multi p value
age_at_index_date,1.0,0.999728,1.000272,0.9999447,0.994835,0.994377,0.995294,2.648174e-107
antidepressants_100_pdds,1.030238,1.0243,1.03621,5.447317e-24,1.013077,1.007504,1.01868,3.904092e-06
benzo_and_z_drugs_100_pdds,1.021076,1.015973,1.026203,3.349874e-16,1.009061,1.003863,1.014286,0.0006195008
chronic_pulmonary_disease,1.373408,1.304589,1.445857,1.092113e-33,1.36976,1.292131,1.452052,4.108002e-26
diabetes,1.386815,1.289873,1.491043,9.193739999999999e-19,1.338785,1.237892,1.447901,2.912771e-13
epilepsy,1.751515,1.446632,2.120653,9.239646e-09,1.460173,1.188252,1.794321,0.0003176541
hypertension,1.081301,1.045818,1.117987,4.398096e-06,1.084934,1.035248,1.137004,0.0006536363
insomnia_count:1_5,1.551674,1.409792,1.707835,2.7154129999999996e-19,1.290981,1.165778,1.42963,9.24816e-07
insomnia_count:above_5,2.714286,1.64577,4.476535,9.166635e-05,1.902705,1.141629,3.17116,0.01358005
mental_illness_non_smi,1.581271,1.516243,1.649088,1.765649e-101,1.711975,1.625262,1.803315,2.2210879999999999e-91


In [78]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","0.99 (0.99, 1.00), P=0.000"
Antidepressants (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01 (1.01, 1.02), P=0.000"
Benzodiazepines and z-drugs (100 PDDs),"1.02, (1.02, 1.03), P=0.000","1.01 (1.00, 1.01), P=0.001"
Chronic pulmonary disease,"1.37, (1.30, 1.45), P=0.000","1.37 (1.29, 1.45), P=0.000"
Diabetes,"1.39, (1.29, 1.49), P=0.000","1.34 (1.24, 1.45), P=0.000"
Epilepsy,"1.75, (1.45, 2.12), P=0.000","1.46 (1.19, 1.79), P=0.000"
Hypertension,"1.08, (1.05, 1.12), P=0.000","1.08 (1.04, 1.14), P=0.001"
Insomnia count:1 5,"1.55, (1.41, 1.71), P=0.000","1.29 (1.17, 1.43), P=0.000"
Insomnia count:above 5,"2.71, (1.65, 4.48), P=0.000","1.90 (1.14, 3.17), P=0.014"
Mental illness non smi,"1.58, (1.52, 1.65), P=0.000","1.71 (1.63, 1.80), P=0.000"
