In [22]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
cd ~/demres

/Users/zurfarosa/demres


In [24]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
import pylab as pl
from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [25]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [26]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [27]:
window = '10_to_5' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [28]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [29]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_start', 'data_end', 'matchid',
       'age_at_index_date', 'non_insomnia_GP_consultations', 'stroke',
       'CHD_heart_failure_and_peripheral_vascular_disease', 'hypertension',
       'diabetes', 'mental_illness_non_smi', 'mental_illness_smi',
       'sleep_apnoea', 'chronic_pulmonary_disease', 'epilepsy',
       'age_at_index_date:65-69', 'age_at_index_date:70-74',
       'age_at_index_date:75-79', 'age_at_index_date:80-84',
       'age_at_index_date:85-89', 'age_at_index_date:90-99',
       'age_at_index_date:above_99', 'non_insomnia_GP_consultations:1_10',
       'non_insomnia_GP_consultations:11_100',
       'non_insomnia_GP_consultations:101_1000',
       'non_insomnia_GP_consultations:above_1000', 'antidepressants_pdds:0',
       'antidepressants_pdds:1_10', 'antidepressants_pdds:11_100',
       'antidepressants_pdds:101_1000', 'antidepressants_pdds:1001_10000',
       'anti

In [30]:
pt_features['intercept'] = 1.0

In [31]:
# pt_features.sort_values(by='insomnia_count<=10',ascending=False)

In [35]:
columns_always_for_inclusion = [
    'female',
    'non_insomnia_GP_consultations', 'stroke',
    'CHD_heart_failure_and_peripheral_vascular_disease', 'hypertension',
    'diabetes', 'mental_illness_non_smi', 'mental_illness_smi',
    'sleep_apnoea', 'chronic_pulmonary_disease', 'epilepsy',
    'mood_stabilisers_pdds',
    'other_sedatives_pdds', 'antidepressants_pdds', 
    'depot_antipsychotics_pdds', 'antipsychotics_pdds'
]

## Insomnia consultations

### insomnia yes/no

In [47]:
cols_for_inclusion_here = ['insomnia_any']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

The following variables are being removed as mean = 0:



*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
stroke                                                 1.3381    0.000
CHD_heart_failure_and_peripheral_vascular_disease      1.1020    0.000
hypertension                                           0.9536    0.022
diabetes                                               1.2097    0.000
mental_illness_non_smi                                 1.4272    0.000
mental_illness_smi                                     1.5602    0.000
sleep_apnoea                                           1.1176    0.739
chronic_pulmonary_disease                              1.0750    0.024
epilepsy                                               1.5667    0.000
mood_stabilisers_pdds                                  1.0003

In [48]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
stroke,1.288,0.253,0.0,1.167,1.42
hypertension,0.836,-0.18,0.0,0.793,0.88
diabetes,1.193,0.177,0.0,1.086,1.311
mental_illness_non_smi,1.507,0.41,0.0,1.417,1.602
epilepsy,1.374,0.318,0.007,1.089,1.735
antidepressants_pdds,1.0,0.0,0.003,1.0,1.0
insomnia_any,1.231,0.208,0.0,1.135,1.335
female,0.889,-0.118,0.0,0.851,0.928


### insomnia count

In [49]:
cols_for_inclusion_here = ['insomnia']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

The following variables are being removed as mean = 0:



*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
stroke                                                 1.3381    0.000
CHD_heart_failure_and_peripheral_vascular_disease      1.1020    0.000
hypertension                                           0.9536    0.022
diabetes                                               1.2097    0.000
mental_illness_non_smi                                 1.4272    0.000
mental_illness_smi                                     1.5602    0.000
sleep_apnoea                                           1.1176    0.739
chronic_pulmonary_disease                              1.0750    0.024
epilepsy                                               1.5667    0.000
mood_stabilisers_pdds                                  1.0003

In [50]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
stroke,1.292,0.256,0.0,1.171,1.425
hypertension,0.837,-0.178,0.0,0.795,0.882
diabetes,1.194,0.177,0.0,1.086,1.312
mental_illness_non_smi,1.509,0.411,0.0,1.42,1.604
epilepsy,1.378,0.321,0.007,1.092,1.74
antidepressants_pdds,1.0,0.0,0.003,1.0,1.0
insomnia,1.098,0.093,0.0,1.057,1.14
female,0.893,-0.113,0.0,0.856,0.933


### insomnia quantiles

In [55]:
cols_for_inclusion_here = ['insomnia_count:1_10','insomnia_count:above_10']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

The following variables are being removed as mean = 0:



*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
stroke                                                 1.3381    0.000
CHD_heart_failure_and_peripheral_vascular_disease      1.1020    0.000
hypertension                                           0.9536    0.022
diabetes                                               1.2097    0.000
mental_illness_non_smi                                 1.4272    0.000
mental_illness_smi                                     1.5602    0.000
sleep_apnoea                                           1.1176    0.739
chronic_pulmonary_disease                              1.0750    0.024
epilepsy                                               1.5667    0.000
mood_stabilisers_pdds                                  1.0003

In [56]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
stroke,1.288,0.253,0.0,1.167,1.421
hypertension,0.836,-0.18,0.0,0.793,0.88
diabetes,1.193,0.177,0.0,1.086,1.311
mental_illness_non_smi,1.507,0.41,0.0,1.418,1.602
epilepsy,1.374,0.318,0.007,1.089,1.734
antidepressants_pdds,1.0,0.0,0.003,1.0,1.0
insomnia_count:1_10,1.233,0.209,0.0,1.136,1.337
insomnia_count:above_10,1.02,0.02,0.969,0.376,2.771
female,0.889,-0.118,0.0,0.851,0.928


## Benzo and z-drug PDDs

In [None]:
### benzos yes/no

In [62]:
cols_for_inclusion_here = ['benzo_and_z_drugs_any']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

The following variables are being removed as mean = 0:



*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
stroke                                                 1.3381    0.000
CHD_heart_failure_and_peripheral_vascular_disease      1.1020    0.000
hypertension                                           0.9536    0.022
diabetes                                               1.2097    0.000
mental_illness_non_smi                                 1.4272    0.000
mental_illness_smi                                     1.5602    0.000
sleep_apnoea                                           1.1176    0.739
chronic_pulmonary_disease                              1.0750    0.024
epilepsy                                               1.5667    0.000
benzo_and_z_drugs_any                                  1.2573

In [63]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
stroke,1.295,0.259,0.0,1.174,1.429
hypertension,0.842,-0.172,0.0,0.799,0.886
diabetes,1.192,0.175,0.0,1.084,1.309
mental_illness_non_smi,1.522,0.42,0.0,1.432,1.619
epilepsy,1.382,0.324,0.006,1.095,1.744
benzo_and_z_drugs_any,1.089,0.085,0.087,0.988,1.201
antidepressants_pdds,1.0,0.0,0.007,1.0,1.0
female,0.9,-0.105,0.0,0.862,0.94


### benzo PDDs

In [66]:
cols_for_inclusion_here = ['benzo_and_z_drugs_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

The following variables are being removed as mean = 0:



*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
stroke                                                 1.3381    0.000
CHD_heart_failure_and_peripheral_vascular_disease      1.1020    0.000
hypertension                                           0.9536    0.022
diabetes                                               1.2097    0.000
mental_illness_non_smi                                 1.4272    0.000
mental_illness_smi                                     1.5602    0.000
sleep_apnoea                                           1.1176    0.739
chronic_pulmonary_disease                              1.0750    0.024
epilepsy                                               1.5667    0.000
mood_stabilisers_pdds                                  1.0003

In [67]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
non_insomnia_GP_consultations,1.001,0.001,0.0,1.001,1.002
stroke,1.255,0.227,0.0,1.136,1.386
hypertension,0.809,-0.212,0.0,0.766,0.855
diabetes,1.122,0.115,0.022,1.017,1.239
mental_illness_non_smi,1.486,0.396,0.0,1.395,1.582
epilepsy,1.345,0.296,0.013,1.065,1.698
benzo_and_z_drugs_pdds,1.0,-0.0,0.519,1.0,1.0
antidepressants_pdds,1.0,0.0,0.006,1.0,1.0
female,0.868,-0.142,0.0,0.828,0.91


### benzos quantiles

In [73]:
cols_for_inclusion_here = ['benzo_and_z_drugs_pdds:1_10', 'benzo_and_z_drugs_pdds:11_100',
       'benzo_and_z_drugs_pdds:101_1000', 'benzo_and_z_drugs_pdds:1001_10000',
       'benzo_and_z_drugs_pdds:above_10000']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

The following variables are being removed as mean = 0:

benzo_and_z_drugs_pdds:above_10000


*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
stroke                                                 1.3381    0.000
CHD_heart_failure_and_peripheral_vascular_disease      1.1020    0.000
hypertension                                           0.9536    0.022
diabetes                                               1.2097    0.000
mental_illness_non_smi                                 1.4272    0.000
mental_illness_smi                                     1.5602    0.000
sleep_apnoea                                           1.1176    0.739
chronic_pulmonary_disease                              1.0750    0.024
epilepsy                                               1.5667    0.000
benzo_and_z_drugs_pdds:1_1

In [74]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
non_insomnia_GP_consultations,1.001,0.001,0.0,1.001,1.002
stroke,1.255,0.228,0.0,1.137,1.387
hypertension,0.81,-0.211,0.0,0.767,0.856
diabetes,1.125,0.117,0.02,1.019,1.241
mental_illness_non_smi,1.478,0.391,0.0,1.388,1.574
epilepsy,1.343,0.295,0.013,1.064,1.696
benzo_and_z_drugs_pdds:1_10,0.997,-0.003,0.982,0.74,1.342
benzo_and_z_drugs_pdds:11_100,1.179,0.165,0.046,1.003,1.386
benzo_and_z_drugs_pdds:101_1000,1.044,0.043,0.638,0.873,1.248
benzo_and_z_drugs_pdds:1001_10000,0.962,-0.038,0.661,0.81,1.143
