In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor 

from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [4]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [5]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [6]:
window = '10_to_5' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [83]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [84]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_end', 'data_start', 'matchid',
       'reason_for_removal', 'age_at_index_date', 'insomnia', 'stroke',
       'non_stroke_vascular_disease', 'hypertension', 'diabetes',
       'mental_illness_non_smi', 'mental_illness_smi', 'sleep_apnoea',
       'chronic_pulmonary_disease', 'epilepsy',
       'non_insomnia_GP_consultations', 'benzo_and_z_drugs_any',
       'insomnia_any', 'insomnia_count:0', 'insomnia_count:1_5',
       'insomnia_count:above_5', 'non_insomnia_GP_consultations:0',
       'non_insomnia_GP_consultations:1_10',
       'non_insomnia_GP_consultations:11_100',
       'non_insomnia_GP_consultations:101_1000',
       'non_insomnia_GP_consultations:above_1000', 'age_at_index_date:65-69',
       'age_at_index_date:70-74', 'age_at_index_date:75-79',
       'age_at_index_date:80-84', 'age_at_index_date:85-89',
       'age_at_index_date:90-99', 'age_at_index_date:above_99',
   

In [9]:
pt_features.loc['intercept'] = 1.0

In [10]:
pt_features.sample(5)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,reason_for_removal,age_at_index_date,insomnia,stroke,non_stroke_vascular_disease,hypertension,diabetes,mental_illness_non_smi,mental_illness_smi,sleep_apnoea,chronic_pulmonary_disease,epilepsy,non_insomnia_GP_consultations,benzo_and_z_drugs_any,insomnia_any,insomnia_count:0,insomnia_count:1_5,insomnia_count:above_5,non_insomnia_GP_consultations:0,non_insomnia_GP_consultations:1_10,non_insomnia_GP_consultations:11_100,non_insomnia_GP_consultations:101_1000,non_insomnia_GP_consultations:above_1000,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,antidepressant_pdds:00000,antidepressant_pdds:00001_10,antidepressant_pdds:00011_100,antidepressant_pdds:00101_1000,antidepressant_pdds:01001_10000,antidepressant_pdds:10000_and_above,antipsychotic_pdds:00000,antipsychotic_pdds:00001_10,antipsychotic_pdds:00011_100,antipsychotic_pdds:00101_1000,antipsychotic_pdds:01001_10000,antipsychotic_pdds:10000_and_above,depot_antipsychotic_pdds:00000,depot_antipsychotic_pdds:00001_10,depot_antipsychotic_pdds:00011_100,depot_antipsychotic_pdds:00101_1000,depot_antipsychotic_pdds:01001_10000,depot_antipsychotic_pdds:10000_and_above,other_sedative_pdds:00000,other_sedative_pdds:00001_10,other_sedative_pdds:00011_100,other_sedative_pdds:00101_1000,other_sedative_pdds:01001_10000,other_sedative_pdds:10000_and_above,benzo_and_z_drug_pdds:00000,benzo_and_z_drug_pdds:00001_10,benzo_and_z_drug_pdds:00011_100,benzo_and_z_drug_pdds:00101_1000,benzo_and_z_drug_pdds:01001_10000,benzo_and_z_drug_pdds:10000_and_above,mood_stabiliser_pdds:00000,mood_stabiliser_pdds:00001_10,mood_stabiliser_pdds:00011_100,mood_stabiliser_pdds:00101_1000,mood_stabiliser_pdds:01001_10000,mood_stabiliser_pdds:10000_and_above,mood_stabilisers_100_pdds,benzo_and_z_drugs_100_pdds,other_sedatives_100_pdds,antipsychotics_100_pdds,depot_antipsychotics_100_pdds,antidepressants_100_pdds
11051,14387670.0,35.0,670.0,0.0,2011-01-12 00:00:00,1.0,4693.0,2012-12-31 00:00:00,1999-04-02 00:00:00,93592.0,,76.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,183.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5201,15791424.0,35.0,424.0,1.0,2010-07-14 00:00:00,0.0,,2013-05-10 00:00:00,2000-07-12 00:00:00,90475.0,,75.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,169.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20826,7816355.0,15.0,355.0,1.0,2006-01-01 00:00:00,0.0,,2009-12-08 00:00:00,1994-01-31 00:00:00,7058.0,,91.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,90.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6583,1327134.0,12.0,134.0,1.0,2005-02-18 00:00:00,1.0,8195.0,2006-03-15 00:00:00,1993-01-31 00:00:00,19770.0,,93.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,49.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25068,178021.0,33.0,21.0,0.0,2004-01-13 00:00:00,0.0,,2010-02-08 00:00:00,1993-01-31 00:00:00,59147.0,,71.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,129.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,12.657964,0.0,0.0,0.0,0.0


In [74]:
columns_always_for_inclusion = [
    'age_at_index_date',
    'female',
#      'non_insomnia_GP_consultations', removed as VIF 4.3 
     'stroke',
    'heart_failure',
#     'non_stroke_vascular_disease',
#     'hypertension',
#     'diabetes', 
    'mental_illness_non_smi', 
    'mental_illness_smi',
    'sleep_apnoea', 
    'chronic_pulmonary_disease', 
    'epilepsy',
    'mood_stabilisers_100_pdds',
    'other_sedatives_100_pdds', 'antidepressants_100_pdds', 
    'depot_antipsychotics_100_pdds', 'antipsychotics_100_pdds',
    'benzo_and_z_drugs_100_pdds'
]

## Insomnia consultations

### insomnia yes/no

In [75]:
cols_for_inclusion_here = ['insomnia_any','benzo_and_z_drugs_100_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
stroke  being retained as mean > 0
mental_illness_non_smi  being retained as mean > 0
mental_illness_smi  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
insomnia_any  being retained as mean > 0
mood_stabilisers_100_pdds  being retained as mean > 0
benzo_and_z_drugs_100_pdds  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
depot_antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0


*Stage 1*
Univariate results
                               Univariate OR    [0.025    0.975]  \
age_at_index_date                   1.000000  0.999728  1.000272   
antidepressants_100_pdds            1.030238  1.024300  1.036210   
antipsychotics_100_pdds             1.022390  1.01011

In [76]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,[0.025,0.975],p value,Multivariate OR,multi [0.025,multi 0.975],multi p value
age_at_index_date,1.0,0.999728,1.000272,0.9999447,0.996753,0.996231,0.997276,5.1021239999999995e-34
antidepressants_100_pdds,1.030238,1.0243,1.03621,5.447317e-24,1.012964,1.007391,1.018568,4.74296e-06
benzo_and_z_drugs_100_pdds,1.021076,1.015973,1.026203,3.349874e-16,1.009535,1.004326,1.014771,0.0003237336
chronic_pulmonary_disease,1.373408,1.304589,1.445857,1.092113e-33,1.392415,1.313832,1.475697,5.777402e-29
epilepsy,1.751515,1.446632,2.120653,9.239646e-09,1.429099,1.163825,1.754836,0.0006541487
female,1.000099,0.972834,1.028129,0.9943737,0.944712,0.899563,0.992127,0.02282835
insomnia_any,1.584746,1.442463,1.741063,8.572512e-22,1.317083,1.191657,1.45571,6.886146e-08
mental_illness_non_smi,1.581271,1.516243,1.649088,1.765649e-101,1.740605,1.652508,1.833398,3.938216e-97
mood_stabilisers_100_pdds,1.031925,1.019629,1.044368,2.769987e-07,1.014036,1.002077,1.026138,0.02129384
other_sedatives_100_pdds,1.045201,1.02088,1.070101,0.0002330376,1.032833,1.009483,1.056723,0.005623521


In [77]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00 (1.00, 1.00), P=0.000"
Antidepressants (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01 (1.01, 1.02), P=0.000"
Benzodiazepines and z-drugs (100 PDDs),"1.02, (1.02, 1.03), P=0.000","1.01 (1.00, 1.01), P=0.000"
Chronic pulmonary disease,"1.37, (1.30, 1.45), P=0.000","1.39 (1.31, 1.48), P=0.000"
Epilepsy,"1.75, (1.45, 2.12), P=0.000","1.43 (1.16, 1.75), P=0.001"
Female,"1.00, (0.97, 1.03), P=0.994","0.94 (0.90, 0.99), P=0.023"
Insomnia any,"1.58, (1.44, 1.74), P=0.000","1.32 (1.19, 1.46), P=0.000"
Mental illness non smi,"1.58, (1.52, 1.65), P=0.000","1.74 (1.65, 1.83), P=0.000"
Mood stabilisers (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01 (1.00, 1.03), P=0.021"
Other sedatives (100 PDDs),"1.05, (1.02, 1.07), P=0.000","1.03 (1.01, 1.06), P=0.006"


### insomnia (continuous variable)

In [78]:
# cols_for_inclusion_here = ['insomnia','benzo_and_z_drugs_100_pdds']
# training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
# summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

In [79]:
# univariate_and_multivariate_results

### insomnia quantiles

In [80]:
cols_for_inclusion_here = ['insomnia_count:1_5','insomnia_count:above_5','benzo_and_z_drugs_100_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results,univariate_and_multivariate_results_formatted = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
stroke  being retained as mean > 0
mental_illness_non_smi  being retained as mean > 0
mental_illness_smi  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
insomnia_count:1_5  being retained as mean > 0
insomnia_count:above_5  being retained as mean > 0
mood_stabilisers_100_pdds  being retained as mean > 0
benzo_and_z_drugs_100_pdds  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
depot_antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0


*Stage 1*
Univariate results
                               Univariate OR    [0.025    0.975]  \
age_at_index_date                   1.000000  0.999728  1.000272   
antidepressants_100_pdds            1.030238  1.024300  1.036210

In [81]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,[0.025,0.975],p value,Multivariate OR,multi [0.025,multi 0.975],multi p value
age_at_index_date,1.0,0.999728,1.000272,0.9999447,0.996757,0.996234,0.997279,6.0378579999999994e-34
antidepressants_100_pdds,1.030238,1.0243,1.03621,5.447317e-24,1.012972,1.007399,1.018577,4.67738e-06
benzo_and_z_drugs_100_pdds,1.021076,1.015973,1.026203,3.349874e-16,1.009419,1.004211,1.014654,0.0003819571
chronic_pulmonary_disease,1.373408,1.304589,1.445857,1.092113e-33,1.391876,1.313317,1.475134,6.765812e-29
epilepsy,1.751515,1.446632,2.120653,9.239646e-09,1.429719,1.164306,1.755633,0.0006450904
female,1.000099,0.972834,1.028129,0.9943737,0.944722,0.89957,0.99214,0.02286035
insomnia_count:1_5,1.551674,1.409792,1.707835,2.7154129999999996e-19,1.296611,1.171121,1.435548,5.690171e-07
insomnia_count:above_5,2.714286,1.64577,4.476535,9.166635e-05,1.970275,1.183583,3.279859,0.009102696
mental_illness_non_smi,1.581271,1.516243,1.649088,1.765649e-101,1.739826,1.651762,1.832584,5.785961e-97
mood_stabilisers_100_pdds,1.031925,1.019629,1.044368,2.769987e-07,1.013947,1.001992,1.026044,0.02209303


In [82]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00 (1.00, 1.00), P=0.000"
Antidepressants (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01 (1.01, 1.02), P=0.000"
Benzodiazepines and z-drugs (100 PDDs),"1.02, (1.02, 1.03), P=0.000","1.01 (1.00, 1.01), P=0.000"
Chronic pulmonary disease,"1.37, (1.30, 1.45), P=0.000","1.39 (1.31, 1.48), P=0.000"
Epilepsy,"1.75, (1.45, 2.12), P=0.000","1.43 (1.16, 1.76), P=0.001"
Female,"1.00, (0.97, 1.03), P=0.994","0.94 (0.90, 0.99), P=0.023"
Insomnia count:1 5,"1.55, (1.41, 1.71), P=0.000","1.30 (1.17, 1.44), P=0.000"
Insomnia count:above 5,"2.71, (1.65, 4.48), P=0.000","1.97 (1.18, 3.28), P=0.009"
Mental illness non smi,"1.58, (1.52, 1.65), P=0.000","1.74 (1.65, 1.83), P=0.000"
Mood stabilisers (100 PDDs),"1.03, (1.02, 1.04), P=0.000","1.01 (1.00, 1.03), P=0.022"
