In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
import pylab as pl
from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [4]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [5]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [6]:
window = '12_to_7' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [7]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [8]:

pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_start', 'data_end', 'matchid',
       'age_at_index_date', 'non_insomnia_GP_consultations', 'stroke',
       'non_stroke_vascular_disease', 'hypertension', 'diabetes',
       'mental_illness_non_smi', 'mental_illness_smi', 'sleep_apnoea',
       'chronic_pulmonary_disease', 'epilepsy', 'age_at_index_date:65-69',
       'age_at_index_date:70-74', 'age_at_index_date:75-79',
       'age_at_index_date:80-84', 'age_at_index_date:85-89',
       'age_at_index_date:90-99', 'age_at_index_date:above_99',
       'insomnia_count:0', 'insomnia_count:above_10',
       'non_insomnia_GP_consultations:0', 'insomnia_count:1_5', 'insomnia',
       'insomnia_count:6_10', 'mood_stabilisers_100_pdds',
       'benzo_and_z_drugs_100_pdds', 'other_sedatives_100_pdds',
       'antipsychotics_100_pdds', 'depot_antipsychotics_100_pdds',
       'antidepressants_100_pdds', 'benzo_and_z_drugs_any', 'insomnia_any

In [9]:
pt_features['intercept'] = 1.0

In [10]:
# pt_features.sort_values(by='insomnia_count<=10',ascending=False)

In [11]:
columns_always_for_inclusion = [
    'female',
    'non_insomnia_GP_consultations', 'stroke',
    'non_stroke_vascular_disease', 'hypertension',
    'diabetes', 'mental_illness_non_smi', 'mental_illness_smi',
    'sleep_apnoea', 'chronic_pulmonary_disease', 'epilepsy',
    'mood_stabilisers_pdds',
    'other_sedatives_pdds', 'antidepressants_pdds', 
    'depot_antipsychotics_pdds', 'antipsychotics_pdds',
    'benzo_and_z_drugs_pdds'
]

## Insomnia consultations

### insomnia yes/no

In [12]:
cols_for_inclusion_here = ['insomnia_any','benzo_and_z_drugs_100_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

The following variables are being removed as mean = 0:



*Stage 1*
Univariate results
                               Univariate OR  p value  [0.025  0.975]
benzo_and_z_drugs_100_pdds             1.006    0.021   1.001   1.011
chronic_pulmonary_disease              1.075    0.024   1.010   1.145
diabetes                               1.210    0.000   1.107   1.322
epilepsy                               1.567    0.000   1.246   1.970
female                                 1.000    1.000   0.967   1.034
hypertension                           0.954    0.022   0.916   0.993
insomnia_any                           1.320    0.000   1.230   1.417
mental_illness_non_smi                 1.427    0.000   1.356   1.502
mental_illness_smi                     1.560    0.000   1.372   1.774
non_insomnia_GP_consultations          1.001    0.000   1.001   1.001
non_stroke_vascular_disease            1.102    0.000   1.044   1.163
sleep_apnoea                           1.118    0.739   0.581   2.150
str

In [13]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,p value,[0.025,0.975],Multivariate OR,p value.1,[0.025.1,0.975].1
benzo_and_z_drugs_100_pdds,1.006,0.021,1.001,1.011,1.0,0.924,0.995,1.005
chronic_pulmonary_disease,1.075,0.024,1.01,1.145,0.983,0.616,0.917,1.053
diabetes,1.21,0.0,1.107,1.322,1.196,0.0,1.088,1.314
epilepsy,1.567,0.0,1.246,1.97,1.381,0.007,1.094,1.743
female,1.0,1.0,0.967,1.034,0.888,0.0,0.85,0.928
hypertension,0.954,0.022,0.916,0.993,0.834,0.0,0.792,0.879
insomnia_any,1.32,0.0,1.23,1.417,1.265,0.0,1.171,1.366
mental_illness_non_smi,1.427,0.0,1.356,1.502,1.504,0.0,1.411,1.602
mental_illness_smi,1.56,0.0,1.372,1.774,1.147,0.053,0.998,1.318
stroke,1.338,0.0,1.219,1.469,1.29,0.0,1.17,1.423


### insomnia (continuous variable)

In [14]:
cols_for_inclusion_here = ['insomnia','benzo_and_z_drugs_100_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

The following variables are being removed as mean = 0:



*Stage 1*
Univariate results
                               Univariate OR  p value  [0.025  0.975]
benzo_and_z_drugs_100_pdds             1.006    0.021   1.001   1.011
chronic_pulmonary_disease              1.075    0.024   1.010   1.145
diabetes                               1.210    0.000   1.107   1.322
epilepsy                               1.567    0.000   1.246   1.970
female                                 1.000    1.000   0.967   1.034
hypertension                           0.954    0.022   0.916   0.993
insomnia                               1.102    0.000   1.069   1.136
mental_illness_non_smi                 1.427    0.000   1.356   1.502
mental_illness_smi                     1.560    0.000   1.372   1.774
non_insomnia_GP_consultations          1.001    0.000   1.001   1.001
non_stroke_vascular_disease            1.102    0.000   1.044   1.163
sleep_apnoea                           1.118    0.739   0.581   2.150
str

In [15]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,p value,[0.025,0.975],Multivariate OR,p value.1,[0.025.1,0.975].1
benzo_and_z_drugs_100_pdds,1.006,0.021,1.001,1.011,1.0,0.927,0.995,1.005
diabetes,1.21,0.0,1.107,1.322,1.195,0.0,1.088,1.313
epilepsy,1.567,0.0,1.246,1.97,1.385,0.006,1.097,1.747
female,1.0,1.0,0.967,1.034,0.896,0.0,0.858,0.935
hypertension,0.954,0.022,0.916,0.993,0.838,0.0,0.795,0.882
insomnia,1.102,0.0,1.069,1.136,1.077,0.0,1.044,1.112
mental_illness_non_smi,1.427,0.0,1.356,1.502,1.511,0.0,1.419,1.609
mental_illness_smi,1.56,0.0,1.372,1.774,1.146,0.054,0.998,1.317
stroke,1.338,0.0,1.219,1.469,1.297,0.0,1.176,1.431


### insomnia quantiles

In [16]:
cols_for_inclusion_here = ['benzo_and_z_drugs_100_pdds','insomnia_count:1_5','insomnia_count:6_10','insomnia_count:above_10']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

The following variables are being removed as mean = 0:



*Stage 1*
Univariate results
                               Univariate OR  p value  [0.025  0.975]
benzo_and_z_drugs_100_pdds             1.006    0.021   1.001   1.011
chronic_pulmonary_disease              1.075    0.024   1.010   1.145
diabetes                               1.210    0.000   1.107   1.322
epilepsy                               1.567    0.000   1.246   1.970
female                                 1.000    1.000   0.967   1.034
hypertension                           0.954    0.022   0.916   0.993
insomnia_count:1_5                     1.318    0.000   1.226   1.416
insomnia_count:6_10                    1.394    0.145   0.891   2.180
insomnia_count:above_10                1.400    0.416   0.622   3.152
mental_illness_non_smi                 1.427    0.000   1.356   1.502
mental_illness_smi                     1.560    0.000   1.372   1.774
non_insomnia_GP_consultations          1.001    0.000   1.001   1.001
non

In [17]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,p value,[0.025,0.975],Multivariate OR,p value.1,[0.025.1,0.975].1
benzo_and_z_drugs_100_pdds,1.006,0.021,1.001,1.011,1.0,0.931,0.995,1.005
chronic_pulmonary_disease,1.075,0.024,1.01,1.145,0.983,0.616,0.917,1.053
diabetes,1.21,0.0,1.107,1.322,1.196,0.0,1.088,1.314
epilepsy,1.567,0.0,1.246,1.97,1.381,0.007,1.094,1.743
female,1.0,1.0,0.967,1.034,0.888,0.0,0.85,0.928
hypertension,0.954,0.022,0.916,0.993,0.834,0.0,0.792,0.879
insomnia_count:1_5,1.318,0.0,1.226,1.416,1.266,0.0,1.171,1.369
insomnia_count:6_10,1.394,0.145,0.891,2.18,1.257,0.322,0.8,1.977
insomnia_count:above_10,1.4,0.416,0.622,3.152,1.167,0.712,0.514,2.649
mental_illness_non_smi,1.427,0.0,1.356,1.502,1.504,0.0,1.411,1.602


## Benzo and z-drug PDDs

In [18]:
### benzos yes/no

In [19]:
# cols_for_inclusion_here = ['benzo_and_z_drugs_any']
# training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
# summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

In [20]:
# result

### benzo PDDs

In [21]:
# cols_for_inclusion_here = ['benzo_and_z_drugs_pdds']
# training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
# summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

In [22]:
# result

### benzos quantiles

In [23]:
# cols_for_inclusion_here = ['benzo_and_z_drugs_pdds:1_10', 'benzo_and_z_drugs_pdds:11_100',
#        'benzo_and_z_drugs_pdds:101_1000', 'benzo_and_z_drugs_pdds:1001_10000',
#        'benzo_and_z_drugs_pdds:above_10000']
# training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
# summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

In [24]:
# result