In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor 

from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [4]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [5]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [6]:
window = '10_to_5' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [7]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [8]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_end', 'data_start', 'matchid',
       'reason_for_removal', 'age_at_index_date', 'insomnia', 'stroke',
       'heart_failure', 'sleep_apnoea', 'chronic_pulmonary_disease',
       'epilepsy', 'antipsychotics_100_pdds', 'antidepressants_100_pdds',
       'non_insomnia_GP_consultations', 'insomnia_any', 'insomnia_count:0',
       'insomnia_count:1_5', 'insomnia_count:above_5',
       'non_insomnia_GP_consultations:0', 'non_insomnia_GP_consultations:1_10',
       'non_insomnia_GP_consultations:11_100',
       'non_insomnia_GP_consultations:101_1000',
       'non_insomnia_GP_consultations:above_1000', 'age_at_index_date:65-69',
       'age_at_index_date:70-74', 'age_at_index_date:75-79',
       'age_at_index_date:80-84', 'age_at_index_date:85-89',
       'age_at_index_date:90-99', 'age_at_index_date:above_99',
       'antidepressant_pdds:00000', 'antidepressant_pdds:00001_10',
       'a

In [9]:
pt_features.loc['intercept'] = 1.0

In [10]:
pt_features.sample(5)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,reason_for_removal,age_at_index_date,insomnia,stroke,heart_failure,sleep_apnoea,chronic_pulmonary_disease,epilepsy,antipsychotics_100_pdds,antidepressants_100_pdds,non_insomnia_GP_consultations,insomnia_any,insomnia_count:0,insomnia_count:1_5,insomnia_count:above_5,non_insomnia_GP_consultations:0,non_insomnia_GP_consultations:1_10,non_insomnia_GP_consultations:11_100,non_insomnia_GP_consultations:101_1000,non_insomnia_GP_consultations:above_1000,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,antidepressant_pdds:00000,antidepressant_pdds:00001_10,antidepressant_pdds:00011_100,antidepressant_pdds:00101_1000,antidepressant_pdds:01001_10000,antidepressant_pdds:10000_and_above,antipsychotic_pdds:00000,antipsychotic_pdds:00001_10,antipsychotic_pdds:00011_100,antipsychotic_pdds:00101_1000,antipsychotic_pdds:01001_10000,antipsychotic_pdds:10000_and_above,mental_illness,mood_stabilisers_and_AEDs_100_pdds,lithium_100_pdds,z_drugs_100_pdds,benzodiazepines_100_pdds,benzodiazepine_pdds:00000,benzodiazepine_pdds:00001_10,benzodiazepine_pdds:00011_100,benzodiazepine_pdds:00101_1000,benzodiazepine_pdds:01001_10000,benzodiazepine_pdds:10000_and_above,z_drug_pdds:00000,z_drug_pdds:00001_10,z_drug_pdds:00011_100,z_drug_pdds:00101_1000,z_drug_pdds:01001_10000,z_drug_pdds:10000_and_above,mood_stabilisers_and_AED_pdds:00000,mood_stabilisers_and_AED_pdds:00001_10,mood_stabilisers_and_AED_pdds:00011_100,mood_stabilisers_and_AED_pdds:00101_1000,mood_stabilisers_and_AED_pdds:01001_10000,mood_stabilisers_and_AED_pdds:10000_and_above,lithium_pdds:00000,lithium_pdds:00001_10,lithium_pdds:00011_100,lithium_pdds:00101_1000,lithium_pdds:01001_10000,lithium_pdds:10000_and_above
10969,1534235.0,40.0,235.0,1.0,2009-02-06 00:00:00,1.0,8195.0,2011-05-06 00:00:00,1997-08-13 00:00:00,35819.0,,69.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090499,0.0,150.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.944423,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
5097,5388628.0,32.0,628.0,1.0,2009-01-01 00:00:00,0.0,,2011-09-06 00:00:00,1998-11-15 00:00:00,52224.0,,77.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,140.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
11046,3450416.0,17.0,416.0,1.0,2006-01-05 00:00:00,0.0,,2008-10-30 00:00:00,1995-01-31 00:00:00,28355.0,,89.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,109.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1447,1725453.0,24.0,453.0,0.0,2009-08-06 00:00:00,1.0,1917.0,2010-10-26 00:00:00,1998-10-03 00:00:00,67051.0,,85.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,86.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1964,4122205.0,15.0,205.0,1.0,2010-01-14 00:00:00,0.0,,2012-02-14 00:00:00,1999-12-13 00:00:00,17494.0,,95.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [11]:
columns_always_for_inclusion = [
    'age_at_index_date',
    'female',
#      'non_insomnia_GP_consultations', #removed as VIF 4.3 
     'stroke',
    'heart_failure',
    'mental_illness',
    'sleep_apnoea', 
    'chronic_pulmonary_disease', 
    'epilepsy',
    'mood_stabilisers_and_AEDs_100_pdds',
    'antidepressants_100_pdds', 
    'antipsychotics_100_pdds',
    'z_drugs_100_pdds',
    'benzodiazepines_100_pdds',
    'lithium_100_pdds'
]

## Insomnia consultations

### insomnia yes/no

In [12]:
cols_for_inclusion_here = ['insomnia_any']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
stroke  being retained as mean > 0
heart_failure  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0
insomnia_any  being retained as mean > 0
mental_illness  being retained as mean > 0
mood_stabilisers_and_AEDs_100_pdds  being retained as mean > 0
lithium_100_pdds  being retained as mean > 0
z_drugs_100_pdds  being retained as mean > 0
benzodiazepines_100_pdds  being retained as mean > 0


*Stage 1*
Univariate results
                                    Univariate OR    [0.025    0.975]  \
age_at_index_date                        1.000000  0.999728  1.000272   
antidepressants_100_pdds                 1.030238  1.024300  1.036210   
antipsychotics_100_pdds                  1.022390  1.010115  1.03

In [13]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,[0.025,0.975],p value,Multivariate OR,multi [0.025,multi 0.975],multi p value
age_at_index_date,1.0,0.999728,1.000272,0.9999447,0.996758,0.996236,0.997281,6.710122e-34
antidepressants_100_pdds,1.030238,1.0243,1.03621,5.447317e-24,1.013228,1.007631,1.018856,3.324122e-06
chronic_pulmonary_disease,1.373408,1.304589,1.445857,1.092113e-33,1.395503,1.316759,1.478955,2.429633e-29
epilepsy,1.751515,1.446632,2.120653,9.239646e-09,1.538843,1.264309,1.872988,1.714463e-05
female,1.000099,0.972834,1.028129,0.9943737,0.947812,0.902539,0.995356,0.03184281
insomnia_any,1.584746,1.442463,1.741063,8.572512e-22,1.314774,1.189435,1.45332,8.613497e-08
lithium_100_pdds,1.080637,1.040209,1.122635,6.70769e-05,1.044329,1.007865,1.082111,0.01675583
mental_illness,1.577402,1.512989,1.644558,7.586978999999999e-102,1.752332,1.664218,1.845112,9.164373e-101
stroke,1.484821,1.376485,1.601685,1.511566e-24,1.451734,1.338778,1.57422,1.8889119999999998e-19
z_drugs_100_pdds,1.076342,1.049561,1.103807,1.048587e-08,1.047146,1.022259,1.072639,0.0001741324


In [14]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=0.000"
Antidepressants (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01, (1.01, 1.02), P=0.000"
Chronic pulmonary disease,"1.37, (1.30, 1.45), P=0.000","1.40, (1.32, 1.48), P=0.000"
Epilepsy,"1.75, (1.45, 2.12), P=0.000","1.54, (1.26, 1.87), P=0.000"
Female,"1.00, (0.97, 1.03), P=0.994","0.95, (0.90, 1.00), P=0.032"
Insomnia any,"1.58, (1.44, 1.74), P=0.000","1.31, (1.19, 1.45), P=0.000"
Lithium (100 PDDs),"1.08, (1.04, 1.12), P=0.000","1.04, (1.01, 1.08), P=0.017"
Mental illness,"1.58, (1.51, 1.64), P=0.000","1.75, (1.66, 1.85), P=0.000"
Stroke,"1.48, (1.38, 1.60), P=0.000","1.45, (1.34, 1.57), P=0.000"
Z drugs (100 PDDs),"1.08, (1.05, 1.10), P=0.000","1.05, (1.02, 1.07), P=0.000"


### insomnia tertiles

In [15]:
cols_for_inclusion_here = ['insomnia_count:1_5','insomnia_count:above_5']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
stroke  being retained as mean > 0
heart_failure  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0
insomnia_count:1_5  being retained as mean > 0
insomnia_count:above_5  being retained as mean > 0
mental_illness  being retained as mean > 0
mood_stabilisers_and_AEDs_100_pdds  being retained as mean > 0
lithium_100_pdds  being retained as mean > 0
z_drugs_100_pdds  being retained as mean > 0
benzodiazepines_100_pdds  being retained as mean > 0


*Stage 1*
Univariate results
                                    Univariate OR    [0.025    0.975]  \
age_at_index_date                        1.000000  0.999728  1.000272   
antidepressants_100_pdds                 1.030238  1.024300  1.036210   
antipsyc

In [16]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,[0.025,0.975],p value,Multivariate OR,multi [0.025,multi 0.975],multi p value
age_at_index_date,1.0,0.999728,1.000272,0.9999447,0.996762,0.996239,0.997285,7.917525e-34
antidepressants_100_pdds,1.030238,1.0243,1.03621,5.447317e-24,1.013226,1.007629,1.018854,3.337935e-06
chronic_pulmonary_disease,1.373408,1.304589,1.445857,1.092113e-33,1.394944,1.316226,1.47837,2.8620850000000003e-29
epilepsy,1.751515,1.446632,2.120653,9.239646e-09,1.538723,1.264184,1.872883,1.724101e-05
female,1.000099,0.972834,1.028129,0.9943737,0.947778,0.902505,0.995322,0.03173599
insomnia_count:1_5,1.551674,1.409792,1.707835,2.7154129999999996e-19,1.295012,1.16958,1.433896,6.571321e-07
insomnia_count:above_5,2.714286,1.64577,4.476535,9.166635e-05,1.947885,1.169277,3.244958,0.01045087
lithium_100_pdds,1.080637,1.040209,1.122635,6.70769e-05,1.044286,1.007838,1.082052,0.01681596
mental_illness,1.577402,1.512989,1.644558,7.586978999999999e-102,1.751503,1.663424,1.844246,1.3909669999999998e-100
stroke,1.484821,1.376485,1.601685,1.511566e-24,1.451557,1.338609,1.574036,1.9499439999999998e-19


In [17]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=0.000"
Antidepressants (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01, (1.01, 1.02), P=0.000"
Chronic pulmonary disease,"1.37, (1.30, 1.45), P=0.000","1.39, (1.32, 1.48), P=0.000"
Epilepsy,"1.75, (1.45, 2.12), P=0.000","1.54, (1.26, 1.87), P=0.000"
Female,"1.00, (0.97, 1.03), P=0.994","0.95, (0.90, 1.00), P=0.032"
Insomnia count:1 5,"1.55, (1.41, 1.71), P=0.000","1.30, (1.17, 1.43), P=0.000"
Insomnia count:above 5,"2.71, (1.65, 4.48), P=0.000","1.95, (1.17, 3.24), P=0.010"
Lithium (100 PDDs),"1.08, (1.04, 1.12), P=0.000","1.04, (1.01, 1.08), P=0.017"
Mental illness,"1.58, (1.51, 1.64), P=0.000","1.75, (1.66, 1.84), P=0.000"
Stroke,"1.48, (1.38, 1.60), P=0.000","1.45, (1.34, 1.57), P=0.000"
