In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor 

from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [4]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [5]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [6]:
window = '10_to_5' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [7]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [8]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_end', 'data_start', 'matchid',
       'reason_for_removal', 'age_at_index_date', 'insomnia', 'stroke',
       'heart_failure', 'sleep_apnoea', 'chronic_pulmonary_disease',
       'epilepsy', 'other_sedatives_100_pdds', 'antipsychotics_100_pdds',
       'antidepressants_100_pdds', 'non_insomnia_GP_consultations',
       'insomnia_any', 'insomnia_count:0', 'insomnia_count:1_5',
       'insomnia_count:above_5', 'non_insomnia_GP_consultations:0',
       'non_insomnia_GP_consultations:1_10',
       'non_insomnia_GP_consultations:11_100',
       'non_insomnia_GP_consultations:101_1000',
       'non_insomnia_GP_consultations:above_1000', 'age_at_index_date:65-69',
       'age_at_index_date:70-74', 'age_at_index_date:75-79',
       'age_at_index_date:80-84', 'age_at_index_date:85-89',
       'age_at_index_date:90-99', 'age_at_index_date:above_99',
       'antidepressant_pdds:00000', 'antid

In [9]:
pt_features.loc['intercept'] = 1.0

In [10]:
pt_features.sample(5)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,reason_for_removal,age_at_index_date,insomnia,stroke,heart_failure,sleep_apnoea,chronic_pulmonary_disease,epilepsy,other_sedatives_100_pdds,antipsychotics_100_pdds,antidepressants_100_pdds,non_insomnia_GP_consultations,insomnia_any,insomnia_count:0,insomnia_count:1_5,insomnia_count:above_5,non_insomnia_GP_consultations:0,non_insomnia_GP_consultations:1_10,non_insomnia_GP_consultations:11_100,non_insomnia_GP_consultations:101_1000,non_insomnia_GP_consultations:above_1000,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,antidepressant_pdds:00000,antidepressant_pdds:00001_10,antidepressant_pdds:00011_100,antidepressant_pdds:00101_1000,antidepressant_pdds:01001_10000,antidepressant_pdds:10000_and_above,antipsychotic_pdds:00000,antipsychotic_pdds:00001_10,antipsychotic_pdds:00011_100,antipsychotic_pdds:00101_1000,antipsychotic_pdds:01001_10000,antipsychotic_pdds:10000_and_above,other_sedative_pdds:00000,other_sedative_pdds:00001_10,other_sedative_pdds:00011_100,other_sedative_pdds:00101_1000,other_sedative_pdds:01001_10000,other_sedative_pdds:10000_and_above,mental_illness,mood_stabilisers_and_AEDs_100_pdds,lithium_100_pdds,z_drugs_100_pdds,benzodiazepines_100_pdds,benzodiazepine_pdds:00000,benzodiazepine_pdds:00001_10,benzodiazepine_pdds:00011_100,benzodiazepine_pdds:00101_1000,benzodiazepine_pdds:01001_10000,benzodiazepine_pdds:10000_and_above,z_drug_pdds:00000,z_drug_pdds:00001_10,z_drug_pdds:00011_100,z_drug_pdds:00101_1000,z_drug_pdds:01001_10000,z_drug_pdds:10000_and_above,mood_stabilisers_and_AED_pdds:00000,mood_stabilisers_and_AED_pdds:00001_10,mood_stabilisers_and_AED_pdds:00011_100,mood_stabilisers_and_AED_pdds:00101_1000,mood_stabilisers_and_AED_pdds:01001_10000,mood_stabilisers_and_AED_pdds:10000_and_above,lithium_pdds:00000,lithium_pdds:00001_10,lithium_pdds:00011_100,lithium_pdds:00101_1000,lithium_pdds:01001_10000,lithium_pdds:10000_and_above
6979,18218319.0,19.0,319.0,0.0,2006-08-15 00:00:00,0.0,,2008-12-10 00:00:00,1995-10-06 00:00:00,40391.0,,87.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
16854,4027124.0,25.0,124.0,0.0,2008-07-07 00:00:00,0.0,,2013-04-24 00:00:00,1998-05-08 00:00:00,39918.0,,83.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
27630,7010158.0,31.0,158.0,0.0,2009-05-09 00:00:00,1.0,1917.0,2013-04-11 00:00:00,1994-07-31 00:00:00,23441.0,,78.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
352,1544166.0,16.0,166.0,1.0,2007-05-03 00:00:00,1.0,6578.0,2007-06-25 00:00:00,1996-06-05 00:00:00,24749.0,,91.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,127.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
9373,23601319.0,28.0,319.0,1.0,2009-10-12 00:00:00,0.0,,2013-04-02 00:00:00,1999-10-06 00:00:00,10654.0,,81.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,109.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [11]:
columns_always_for_inclusion = [
    'age_at_index_date',
    'female',
#      'non_insomnia_GP_consultations', #removed as VIF 4.3 
     'stroke',
    'heart_failure',
    'mental_illness',
    'sleep_apnoea', 
    'chronic_pulmonary_disease', 
    'epilepsy',
    'mood_stabilisers_and_AEDs_100_pdds',
    'other_sedatives_100_pdds', 
    'antidepressants_100_pdds', 
    'antipsychotics_100_pdds',
    'z_drugs_100_pdds',
    'benzodiazepines_100_pdds',
    'lithium_100_pdds'
]

## Insomnia consultations

### insomnia yes/no

In [12]:
cols_for_inclusion_here = ['insomnia_any']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
stroke  being retained as mean > 0
heart_failure  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0
insomnia_any  being retained as mean > 0
mental_illness  being retained as mean > 0
mood_stabilisers_and_AEDs_100_pdds  being retained as mean > 0
lithium_100_pdds  being retained as mean > 0
z_drugs_100_pdds  being retained as mean > 0
benzodiazepines_100_pdds  being retained as mean > 0


*Stage 1*
Univariate results
                                    Univariate OR    [0.025    0.975]  \
age_at_index_date                        1.000000  0.999728  1.000272   
antidepressants_100_pdds                 1.030238  1.024300  1.036210   
antipsychoti

In [13]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,[0.025,0.975],p value,Multivariate OR,multi [0.025,multi 0.975],multi p value
age_at_index_date,1.0,0.999728,1.000272,0.9999447,0.996732,0.99621,0.997256,2.238203e-34
antidepressants_100_pdds,1.030238,1.0243,1.03621,5.447317e-24,1.013089,1.007499,1.018711,4.104032e-06
chronic_pulmonary_disease,1.373408,1.304589,1.445857,1.092113e-33,1.395537,1.316783,1.479002,2.443657e-29
epilepsy,1.751515,1.446632,2.120653,9.239646e-09,1.535599,1.261641,1.869045,1.885298e-05
female,1.000099,0.972834,1.028129,0.9943737,0.947458,0.902195,0.994992,0.03069791
insomnia_any,1.584746,1.442463,1.741063,8.572512e-22,1.310855,1.185849,1.449038,1.199524e-07
lithium_100_pdds,1.080637,1.040209,1.122635,6.70769e-05,1.044386,1.007952,1.082137,0.01652305
mental_illness,1.577402,1.512989,1.644558,7.586978999999999e-102,1.750682,1.662633,1.843394,2.158769e-100
other_sedatives_100_pdds,1.045201,1.02088,1.070101,0.0002330376,1.034768,1.011272,1.058809,0.003540339
stroke,1.484821,1.376485,1.601685,1.511566e-24,1.450255,1.337395,1.572638,2.399794e-19


In [14]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=0.000"
Antidepressants (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01, (1.01, 1.02), P=0.000"
Chronic pulmonary disease,"1.37, (1.30, 1.45), P=0.000","1.40, (1.32, 1.48), P=0.000"
Epilepsy,"1.75, (1.45, 2.12), P=0.000","1.54, (1.26, 1.87), P=0.000"
Female,"1.00, (0.97, 1.03), P=0.994","0.95, (0.90, 0.99), P=0.031"
Insomnia any,"1.58, (1.44, 1.74), P=0.000","1.31, (1.19, 1.45), P=0.000"
Lithium (100 PDDs),"1.08, (1.04, 1.12), P=0.000","1.04, (1.01, 1.08), P=0.017"
Mental illness,"1.58, (1.51, 1.64), P=0.000","1.75, (1.66, 1.84), P=0.000"
Other sedatives (100 PDDs),"1.05, (1.02, 1.07), P=0.000","1.03, (1.01, 1.06), P=0.004"
Stroke,"1.48, (1.38, 1.60), P=0.000","1.45, (1.34, 1.57), P=0.000"


### insomnia (continuous variable)

### insomnia tertiles

In [15]:
cols_for_inclusion_here = ['insomnia_count:1_5','insomnia_count:above_5']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
stroke  being retained as mean > 0
heart_failure  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0
insomnia_count:1_5  being retained as mean > 0
insomnia_count:above_5  being retained as mean > 0
mental_illness  being retained as mean > 0
mood_stabilisers_and_AEDs_100_pdds  being retained as mean > 0
lithium_100_pdds  being retained as mean > 0
z_drugs_100_pdds  being retained as mean > 0
benzodiazepines_100_pdds  being retained as mean > 0


*Stage 1*
Univariate results
                                    Univariate OR    [0.025    0.975]  \
age_at_index_date                        1.000000  0.999728  1.000272   
antidepressants_100_pdds    

In [16]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,[0.025,0.975],p value,Multivariate OR,multi [0.025,multi 0.975],multi p value
age_at_index_date,1.0,0.999728,1.000272,0.9999447,0.996736,0.996213,0.997259,2.637083e-34
antidepressants_100_pdds,1.030238,1.0243,1.03621,5.447317e-24,1.013086,1.007496,1.018708,4.125409e-06
chronic_pulmonary_disease,1.373408,1.304589,1.445857,1.092113e-33,1.394974,1.316244,1.478412,2.882819e-29
epilepsy,1.751515,1.446632,2.120653,9.239646e-09,1.535528,1.261556,1.868999,1.893216e-05
female,1.000099,0.972834,1.028129,0.9943737,0.947423,0.90216,0.994958,0.03059117
insomnia_count:1_5,1.551674,1.409792,1.707835,2.7154129999999996e-19,1.290878,1.165803,1.429373,9.093636e-07
insomnia_count:above_5,2.714286,1.64577,4.476535,9.166635e-05,1.951352,1.171317,3.250852,0.01025257
lithium_100_pdds,1.080637,1.040209,1.122635,6.70769e-05,1.044343,1.007925,1.082077,0.01658153
mental_illness,1.577402,1.512989,1.644558,7.586978999999999e-102,1.749841,1.661827,1.842515,3.2936179999999997e-100
other_sedatives_100_pdds,1.045201,1.02088,1.070101,0.0002330376,1.034881,1.011379,1.058929,0.00344017


In [17]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=0.000"
Antidepressants (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01, (1.01, 1.02), P=0.000"
Chronic pulmonary disease,"1.37, (1.30, 1.45), P=0.000","1.39, (1.32, 1.48), P=0.000"
Epilepsy,"1.75, (1.45, 2.12), P=0.000","1.54, (1.26, 1.87), P=0.000"
Female,"1.00, (0.97, 1.03), P=0.994","0.95, (0.90, 0.99), P=0.031"
Insomnia count:1 5,"1.55, (1.41, 1.71), P=0.000","1.29, (1.17, 1.43), P=0.000"
Insomnia count:above 5,"2.71, (1.65, 4.48), P=0.000","1.95, (1.17, 3.25), P=0.010"
Lithium (100 PDDs),"1.08, (1.04, 1.12), P=0.000","1.04, (1.01, 1.08), P=0.017"
Mental illness,"1.58, (1.51, 1.64), P=0.000","1.75, (1.66, 1.84), P=0.000"
Other sedatives (100 PDDs),"1.05, (1.02, 1.07), P=0.000","1.03, (1.01, 1.06), P=0.003"
