In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor 

from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [4]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [5]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [6]:
window = '10_to_5' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [7]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [8]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_end', 'data_start', 'matchid',
       'reason_for_removal', 'age_at_index_date', 'insomnia', 'stroke',
       'heart_failure', 'sleep_apnoea', 'chronic_pulmonary_disease',
       'epilepsy', 'mood_stabilisers_100_pdds', 'benzo_and_z_drugs_100_pdds',
       'other_sedatives_100_pdds', 'antipsychotics_100_pdds',
       'antidepressants_100_pdds', 'non_insomnia_GP_consultations',
       'benzo_and_z_drugs_any', 'insomnia_any', 'insomnia_count:0',
       'insomnia_count:1_5', 'insomnia_count:above_5',
       'non_insomnia_GP_consultations:0', 'non_insomnia_GP_consultations:1_10',
       'non_insomnia_GP_consultations:11_100',
       'non_insomnia_GP_consultations:101_1000',
       'non_insomnia_GP_consultations:above_1000', 'age_at_index_date:65-69',
       'age_at_index_date:70-74', 'age_at_index_date:75-79',
       'age_at_index_date:80-84', 'age_at_index_date:85-89',
       'age_at_

In [9]:
pt_features.loc['intercept'] = 1.0

In [10]:
pt_features.sample(5)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,reason_for_removal,age_at_index_date,insomnia,stroke,heart_failure,sleep_apnoea,chronic_pulmonary_disease,epilepsy,mood_stabilisers_100_pdds,benzo_and_z_drugs_100_pdds,other_sedatives_100_pdds,antipsychotics_100_pdds,antidepressants_100_pdds,non_insomnia_GP_consultations,benzo_and_z_drugs_any,insomnia_any,insomnia_count:0,insomnia_count:1_5,insomnia_count:above_5,non_insomnia_GP_consultations:0,non_insomnia_GP_consultations:1_10,non_insomnia_GP_consultations:11_100,non_insomnia_GP_consultations:101_1000,non_insomnia_GP_consultations:above_1000,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,antidepressant_pdds:00000,antidepressant_pdds:00001_10,antidepressant_pdds:00011_100,antidepressant_pdds:00101_1000,antidepressant_pdds:01001_10000,antidepressant_pdds:10000_and_above,antipsychotic_pdds:00000,antipsychotic_pdds:00001_10,antipsychotic_pdds:00011_100,antipsychotic_pdds:00101_1000,antipsychotic_pdds:01001_10000,antipsychotic_pdds:10000_and_above,other_sedative_pdds:00000,other_sedative_pdds:00001_10,other_sedative_pdds:00011_100,other_sedative_pdds:00101_1000,other_sedative_pdds:01001_10000,other_sedative_pdds:10000_and_above,benzo_and_z_drug_pdds:00000,benzo_and_z_drug_pdds:00001_10,benzo_and_z_drug_pdds:00011_100,benzo_and_z_drug_pdds:00101_1000,benzo_and_z_drug_pdds:01001_10000,benzo_and_z_drug_pdds:10000_and_above,mood_stabiliser_pdds:00000,mood_stabiliser_pdds:00001_10,mood_stabiliser_pdds:00011_100,mood_stabiliser_pdds:00101_1000,mood_stabiliser_pdds:01001_10000,mood_stabiliser_pdds:10000_and_above,mental_illness
20727,9033084.0,19.0,84.0,1.0,2008-06-17 00:00:00,1.0,26270.0,2008-12-01 00:00:00,1993-01-31 00:00:00,11329.0,,89.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.289892,0.258569,0.0,129.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
16045,6237079.0,23.0,79.0,0.0,2009-01-01 00:00:00,0.0,,2013-04-25 00:00:00,1998-07-18 00:00:00,51027.0,,86.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.545616,0.0,0.0,0.748033,118.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
30123,7610400.0,27.0,400.0,1.0,2004-06-29 00:00:00,1.0,7323.0,2011-08-31 00:00:00,1990-01-31 00:00:00,59425.0,,77.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,162.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
29777,6283202.0,29.0,202.0,1.0,2010-11-01 00:00:00,1.0,11175.0,2013-03-25 00:00:00,1992-02-29 00:00:00,30928.0,,81.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,202.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1143,2710251.0,19.0,251.0,1.0,2010-11-15 00:00:00,1.0,4693.0,2011-01-14 00:00:00,1999-01-27 00:00:00,38383.0,,91.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,96.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
columns_always_for_inclusion = [
    'age_at_index_date',
    'female',
#      'non_insomnia_GP_consultations', #removed as VIF 4.3 
     'stroke',
    'heart_failure',
    'mental_illness',
    'sleep_apnoea', 
    'chronic_pulmonary_disease', 
    'epilepsy',
    'mood_stabilisers_100_pdds',
    'other_sedatives_100_pdds', 
    'antidepressants_100_pdds', 
    'antipsychotics_100_pdds',
    'benzo_and_z_drugs_100_pdds'
]

## Insomnia consultations

### insomnia yes/no

In [12]:
cols_for_inclusion_here = ['insomnia_any','benzo_and_z_drugs_100_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
stroke  being retained as mean > 0
heart_failure  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
mood_stabilisers_100_pdds  being retained as mean > 0
benzo_and_z_drugs_100_pdds  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0
insomnia_any  being retained as mean > 0
mental_illness  being retained as mean > 0


*Stage 1*
Univariate results
                            Univariate OR    [0.025    0.975]        p value
age_at_index_date                1.000000  0.999728  1.000272   9.999447e-01
antidepressants_100_pdds         1.030238  1.024300  1.036210   5.447317e-24
antipsychotics_100_pdds          1.022390  1.010115  1.034813   3.266641e-04
benzo_and_z_drugs_10

In [13]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,[0.025,0.975],p value,Multivariate OR,multi [0.025,multi 0.975],multi p value
age_at_index_date,1.0,0.999728,1.000272,0.9999447,0.996724,0.996202,0.997247,1.546296e-34
antidepressants_100_pdds,1.030238,1.0243,1.03621,5.447317e-24,1.012953,1.007383,1.018553,4.759461e-06
benzo_and_z_drugs_100_pdds,1.021076,1.015973,1.026203,3.349874e-16,1.009499,1.004292,1.014733,0.000339685
chronic_pulmonary_disease,1.373408,1.304589,1.445857,1.092113e-33,1.393848,1.315189,1.477211,3.879162e-29
epilepsy,1.751515,1.446632,2.120653,9.239646e-09,1.429295,1.163899,1.755208,0.0006539422
female,1.000099,0.972834,1.028129,0.9943737,0.943962,0.89884,0.99135,0.02102176
insomnia_any,1.584746,1.442463,1.741063,8.572512e-22,1.316914,1.191501,1.455527,6.988712e-08
mental_illness,1.577402,1.512989,1.644558,7.586978999999999e-102,1.743377,1.655527,1.835888,1.510316e-98
mood_stabilisers_100_pdds,1.031925,1.019629,1.044368,2.769987e-07,1.013363,1.001468,1.0254,0.02756306
other_sedatives_100_pdds,1.045201,1.02088,1.070101,0.0002330376,1.033002,1.009642,1.056903,0.005399791


In [14]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=0.000"
Antidepressants (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01, (1.01, 1.02), P=0.000"
Benzodiazepines and z-drugs (100 PDDs),"1.02, (1.02, 1.03), P=0.000","1.01, (1.00, 1.01), P=0.000"
Chronic pulmonary disease,"1.37, (1.30, 1.45), P=0.000","1.39, (1.32, 1.48), P=0.000"
Epilepsy,"1.75, (1.45, 2.12), P=0.000","1.43, (1.16, 1.76), P=0.001"
Female,"1.00, (0.97, 1.03), P=0.994","0.94, (0.90, 0.99), P=0.021"
Insomnia any,"1.58, (1.44, 1.74), P=0.000","1.32, (1.19, 1.46), P=0.000"
Mental illness,"1.58, (1.51, 1.64), P=0.000","1.74, (1.66, 1.84), P=0.000"
Mood stabilisers (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01, (1.00, 1.03), P=0.028"
Other sedatives (100 PDDs),"1.05, (1.02, 1.07), P=0.000","1.03, (1.01, 1.06), P=0.005"


### insomnia (continuous variable)

In [15]:
# cols_for_inclusion_here = ['insomnia','benzo_and_z_drugs_100_pdds']
# training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
# summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

In [16]:
# univariate_and_multivariate_results

### insomnia tertiles

In [17]:
cols_for_inclusion_here = ['insomnia_count:1_5','insomnia_count:above_5','benzo_and_z_drugs_100_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
stroke  being retained as mean > 0
heart_failure  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
mood_stabilisers_100_pdds  being retained as mean > 0
benzo_and_z_drugs_100_pdds  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0
insomnia_count:1_5  being retained as mean > 0
insomnia_count:above_5  being retained as mean > 0
mental_illness  being retained as mean > 0


*Stage 1*
Univariate results
                            Univariate OR    [0.025    0.975]        p value
age_at_index_date                1.000000  0.999728  1.000272   9.999447e-01
antidepressants_100_pdds         1.030238  1.024300  1.036210   5.447317e-24
antipsychotics_100_pdds          1.02239

In [18]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,[0.025,0.975],p value,Multivariate OR,multi [0.025,multi 0.975],multi p value
age_at_index_date,1.0,0.999728,1.000272,0.9999447,0.996728,0.996205,0.997251,1.829749e-34
antidepressants_100_pdds,1.030238,1.0243,1.03621,5.447317e-24,1.012961,1.007391,1.018561,4.695999e-06
benzo_and_z_drugs_100_pdds,1.021076,1.015973,1.026203,3.349874e-16,1.009382,1.004176,1.014615,0.0004012424
chronic_pulmonary_disease,1.373408,1.304589,1.445857,1.092113e-33,1.393303,1.314668,1.476642,4.55147e-29
epilepsy,1.751515,1.446632,2.120653,9.239646e-09,1.42992,1.164385,1.756009,0.0006447814
female,1.000099,0.972834,1.028129,0.9943737,0.943972,0.898847,0.991363,0.02105188
insomnia_count:1_5,1.551674,1.409792,1.707835,2.7154129999999996e-19,1.296309,1.170843,1.43522,5.831404e-07
insomnia_count:above_5,2.714286,1.64577,4.476535,9.166635e-05,1.974531,1.186144,3.28693,0.008883925
mental_illness,1.577402,1.512989,1.644558,7.586978999999999e-102,1.742636,1.654818,1.835114,2.18611e-98
mood_stabilisers_100_pdds,1.031925,1.019629,1.044368,2.769987e-07,1.013273,1.001382,1.025305,0.0285745


In [19]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=0.000"
Antidepressants (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01, (1.01, 1.02), P=0.000"
Benzodiazepines and z-drugs (100 PDDs),"1.02, (1.02, 1.03), P=0.000","1.01, (1.00, 1.01), P=0.000"
Chronic pulmonary disease,"1.37, (1.30, 1.45), P=0.000","1.39, (1.31, 1.48), P=0.000"
Epilepsy,"1.75, (1.45, 2.12), P=0.000","1.43, (1.16, 1.76), P=0.001"
Female,"1.00, (0.97, 1.03), P=0.994","0.94, (0.90, 0.99), P=0.021"
Insomnia count:1 5,"1.55, (1.41, 1.71), P=0.000","1.30, (1.17, 1.44), P=0.000"
Insomnia count:above 5,"2.71, (1.65, 4.48), P=0.000","1.97, (1.19, 3.29), P=0.009"
Mental illness,"1.58, (1.51, 1.64), P=0.000","1.74, (1.65, 1.84), P=0.000"
Mood stabilisers (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01, (1.00, 1.03), P=0.029"
