In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
import pylab as pl
from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [4]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [5]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [6]:
window = '8_to_3' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [2]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

NameError: name 'pd' is not defined

In [8]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_start', 'data_end', 'matchid',
       'age_at_index_date', 'non_insomnia_GP_consultations', 'insomnia_count',
       'insomnia_count:1-5', 'insomnia_count:>5', 'mood_stabilisers_100_pdds',
       'benzo_and_z_drugs_100_pdds', 'benzo_and_z_drugs:1-1095_pdds',
       'benzo_and_z_drugs:>1096_pdds', 'benzo_and_z_drugs',
       'other_sedatives_100_pdds', 'fgas_100_pdds', 'sgas_100_pdds',
       'sga_depots_100_pdds', 'fga_depots_100_pdds',
       'antidepressants_100_pdds', 'insomnia', 'stroke',
       'CHD_heart_failure_and_peripheral_vascular_disease', 'hypertension',
       'diabetes', 'mental_illness_non_smi', 'mental_illness_smi',
       'sleep_apnoea', 'chronic_pulmonary_disease', 'epilepsy'],
      dtype='object')

In [9]:
pt_features['intercept'] = 1.0

In [10]:
# pt_features.sort_values(by='insomnia_count<=10',ascending=False)

In [1]:
columns_never_for_inclusion = [
        'patid', 'yob', 'pracid', 'index_date', 'isCase',
       'final dementia medcode', 'data_start', 'data_end', 'matchid',
        'age_at_index_date:65-69',
       'age_at_index_date:70-74', 'age_at_index_date:75-79',
       'age_at_index_date:80-84', 'age_at_index_date:85-89',
       'age_at_index_date:90-99', 'age_at_index_date:above_99',
        'insomnia_count:0','benzo_and_z_drugs_pdds:0',
       'non_insomnia_GP_consultations:0', 'non_insomnia_GP_consultations:1_10',
       'non_insomnia_GP_consultations:11_100',
       'non_insomnia_GP_consultations:101_1000',
       'non_insomnia_GP_consultations:above_1000', 
        'antidepressants_pdds:0',
       'antidepressants_pdds:1_10', 'antidepressants_pdds:11_100',
       'antidepressants_pdds:101_1000', 'antidepressants_pdds:1001_10000',
       'antidepressants_pdds:above_10000', 'sgas_pdds:0', 'sgas_pdds:1_10',
       'sgas_pdds:11_100', 'sgas_pdds:101_1000', 'sgas_pdds:1001_10000',
       'sgas_pdds:above_10000', 'fgas_pdds:0', 'fgas_pdds:1_10',
       'fgas_pdds:11_100', 'fgas_pdds:101_1000', 'fgas_pdds:1001_10000',
       'fgas_pdds:above_10000', 'other_sedatives_pdds:0',
       'other_sedatives_pdds:1_10', 'other_sedatives_pdds:11_100',
       'other_sedatives_pdds:101_1000', 'other_sedatives_pdds:1001_10000',
       'other_sedatives_pdds:above_10000', 'mood_stabilisers_pdds:0',
       'mood_stabilisers_pdds:1_10', 'mood_stabilisers_pdds:11_100',
       'mood_stabilisers_pdds:101_1000', 'mood_stabilisers_pdds:1001_10000',
       'mood_stabilisers_pdds:above_10000'
]

## Insomnia consultations

### insomnia yes/no

In [12]:
columns_not_for_inclusion_here = [
    'insomnia_count','insomnia_count:1_10', 'insomnia_count:above_10',
    'benzo_and_z_drugs_pdds:0',
    'benzo_and_z_drugs_pdds:1_10', 'benzo_and_z_drugs_pdds:11_100',
    'benzo_and_z_drugs_pdds:101_1000', 'benzo_and_z_drugs_pdds:1001_10000',
    'benzo_and_z_drugs_pdds:above_10000'
    'benzo_and_z_drugs_100_pdds',
    'benzo_and_z_drugs'
]

cols_to_keep = ['insomnia']

training_cols = [col for col in pt_features.columns if col not in columns_never_for_inclusion+columns_not_for_inclusion_here]

In [13]:
training_cols

['female',
 'age_at_index_date',
 'non_insomnia_GP_consultations',
 'mood_stabilisers_100_pdds',
 'other_sedatives_100_pdds',
 'fgas_100_pdds',
 'sgas_100_pdds',
 'sga_depots_100_pdds',
 'fga_depots_100_pdds',
 'antidepressants_100_pdds',
 'insomnia',
 'stroke',
 'CHD_heart_failure_and_peripheral_vascular_disease',
 'hypertension',
 'diabetes',
 'mental_illness_non_smi',
 'mental_illness_smi',
 'sleep_apnoea',
 'chronic_pulmonary_disease',
 'epilepsy',
 'intercept']

In [14]:
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_to_keep)

The following variables are being removed as mean = 0:

sga_depots_100_pdds


*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
age_at_index_date                                      1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
mood_stabilisers_100_pdds                              1.0394    0.000
other_sedatives_100_pdds                               0.9904    0.592
fgas_100_pdds                                          1.0164    0.124
sgas_100_pdds                                          1.0502    0.190
fga_depots_100_pdds                                    0.5368    0.383
antidepressants_100_pdds                               1.0337    0.000
insomnia                                               1.2294    0.000
stroke                                                 1.3381    0.000
CHD_heart_failure_and_peripheral_vascular

In [15]:
# summary_table

In [16]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
non_insomnia_GP_consultations,1.002,0.002,0.0,1.001,1.003
insomnia,1.159,0.148,0.002,1.057,1.272
stroke,1.28,0.247,0.0,1.158,1.414
hypertension,0.84,-0.174,0.0,0.795,0.889
diabetes,1.131,0.123,0.014,1.025,1.248
mental_illness_non_smi,1.537,0.43,0.0,1.445,1.634
epilepsy,1.379,0.321,0.007,1.091,1.741
intercept,0.791,-0.235,0.0,0.753,0.83


### insomnia count

In [17]:
columns_not_for_inclusion_here = [
    
    'insomnia','insomnia_count:1_10', 'insomnia_count:above_10',
    'benzo_and_z_drugs_pdds:0',
    'benzo_and_z_drugs_pdds:1_10', 'benzo_and_z_drugs_pdds:11_100',
    'benzo_and_z_drugs_pdds:101_1000', 'benzo_and_z_drugs_pdds:1001_10000',
    'benzo_and_z_drugs_pdds:above_10000'
    'benzo_and_z_drugs_100_pdds',
    'benzo_and_z_drugs'
    
]

cols_to_keep = ['insomnia_count

training_cols = [col for col in pt_features.columns if col not in columns_never_for_inclusion+columns_not_for_inclusion_here]

In [18]:
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_to_keep)

The following variables are being removed as mean = 0:

sga_depots_100_pdds


*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
age_at_index_date                                      1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
insomnia_count                                         1.1153    0.000
mood_stabilisers_100_pdds                              1.0394    0.000
other_sedatives_100_pdds                               0.9904    0.592
fgas_100_pdds                                          1.0164    0.124
sgas_100_pdds                                          1.0502    0.190
fga_depots_100_pdds                                    0.5368    0.383
antidepressants_100_pdds                               1.0337    0.000
stroke                                                 1.3381    0.000
CHD_heart_failure_and_peripheral_vascular

In [19]:
# summary_table

In [20]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
non_insomnia_GP_consultations,1.002,0.002,0.0,1.001,1.003
insomnia_count,1.078,0.075,0.003,1.026,1.132
stroke,1.281,0.247,0.0,1.159,1.415
hypertension,0.84,-0.174,0.0,0.795,0.889
diabetes,1.13,0.122,0.015,1.024,1.246
mental_illness_non_smi,1.537,0.43,0.0,1.445,1.634
epilepsy,1.381,0.323,0.007,1.093,1.744
intercept,0.793,-0.232,0.0,0.755,0.833


### insomnia quantiles

In [21]:
columns_not_for_inclusion_here = [
    'insomnia','insomnia_count',
    'benzo_and_z_drugs_pdds:0',
    'benzo_and_z_drugs_pdds:1_10', 'benzo_and_z_drugs_pdds:11_100',
    'benzo_and_z_drugs_pdds:101_1000', 'benzo_and_z_drugs_pdds:1001_10000',
    'benzo_and_z_drugs_pdds:above_10000'
    'benzo_and_z_drugs_100_pdds',
    'benzo_and_z_drugs'
]

cols_to_keep = ['insomnia_count:1_10', 'insomnia_count:above_10']

training_cols = [col for col in pt_features.columns if col not in columns_never_for_inclusion+columns_not_for_inclusion_here]

In [22]:
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_to_keep)

The following variables are being removed as mean = 0:

sga_depots_100_pdds


*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
age_at_index_date                                      1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
insomnia_count:1-5                                     1.2240    0.000
insomnia_count:>5                                      1.7778    0.167
mood_stabilisers_100_pdds                              1.0394    0.000
other_sedatives_100_pdds                               0.9904    0.592
fgas_100_pdds                                          1.0164    0.124
sgas_100_pdds                                          1.0502    0.190
fga_depots_100_pdds                                    0.5368    0.383
antidepressants_100_pdds                               1.0337    0.000
stroke                                   

In [23]:
# summary_table

In [24]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
non_insomnia_GP_consultations,1.002,0.002,0.0,1.001,1.003
insomnia_count:1-5,1.158,0.146,0.002,1.055,1.271
insomnia_count:>5,1.324,0.281,0.506,0.579,3.028
stroke,1.28,0.247,0.0,1.159,1.414
hypertension,0.841,-0.174,0.0,0.795,0.889
diabetes,1.131,0.123,0.014,1.025,1.248
mental_illness_non_smi,1.536,0.429,0.0,1.445,1.634
epilepsy,1.379,0.321,0.007,1.092,1.742
intercept,0.791,-0.235,0.0,0.753,0.83


## Benzo and z-drug PDDs

In [25]:
### benzos yes/no

In [26]:
columns_not_for_inclusion_here = [
    'insomnia_count:0', 'insomnia_count:1_10', 'insomnia_count:above_10','insomnia','insomnia_count',
    'benzo_and_z_drugs_pdds:1_10', 'benzo_and_z_drugs_pdds:11_100',
    'benzo_and_z_drugs_pdds:101_1000', 'benzo_and_z_drugs_pdds:1001_10000',
    'benzo_and_z_drugs_pdds:above_10000',
    'benzo_and_z_drugs_pdds'
]

cols_to_keep = ['benzo_and_z_drugs']

training_cols = [col for col in pt_features.columns if col not in columns_never_for_inclusion+columns_not_for_inclusion_here]

In [27]:
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_to_keep)

The following variables are being removed as mean = 0:

sga_depots_100_pdds


*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
age_at_index_date                                      1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
mood_stabilisers_100_pdds                              1.0394    0.000
benzo_and_z_drugs                                      1.2302    0.000
other_sedatives_100_pdds                               0.9904    0.592
fgas_100_pdds                                          1.0164    0.124
sgas_100_pdds                                          1.0502    0.190
fga_depots_100_pdds                                    0.5368    0.383
antidepressants_100_pdds                               1.0337    0.000
stroke                                                 1.3381    0.000
CHD_heart_failure_and_peripheral_vascular

In [28]:
# summary_table

In [29]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
non_insomnia_GP_consultations,1.002,0.002,0.0,1.001,1.003
benzo_and_z_drugs,1.031,0.031,0.564,0.929,1.145
stroke,1.279,0.246,0.0,1.158,1.413
hypertension,0.841,-0.174,0.0,0.795,0.889
diabetes,1.125,0.117,0.019,1.019,1.241
mental_illness_non_smi,1.542,0.433,0.0,1.449,1.64
epilepsy,1.379,0.321,0.007,1.091,1.741
intercept,0.796,-0.228,0.0,0.758,0.836


### benzo PDDs

In [30]:
columns_not_for_inclusion_here = [
    'insomnia_count:0', 'insomnia_count:1_10', 'insomnia_count:above_10','insomnia','insomnia_count',
    'benzo_and_z_drugs_pdds:1_10', 'benzo_and_z_drugs_pdds:11_100',
    'benzo_and_z_drugs_pdds:101_1000', 'benzo_and_z_drugs_pdds:1001_10000',
    'benzo_and_z_drugs_pdds:above_10000'    
    'benzo_and_z_drugs'
]

cols_to_keep = ['benzo_and_z_drugs_pdds']

training_cols = [col for col in pt_features.columns if col not in columns_never_for_inclusion+columns_not_for_inclusion_here]

In [31]:
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_to_keep)

The following variables are being removed as mean = 0:

sga_depots_100_pdds


*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
age_at_index_date                                      1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
mood_stabilisers_100_pdds                              1.0394    0.000
benzo_and_z_drugs_100_pdds                             1.0111    0.039
other_sedatives_100_pdds                               0.9904    0.592
fgas_100_pdds                                          1.0164    0.124
sgas_100_pdds                                          1.0502    0.190
fga_depots_100_pdds                                    0.5368    0.383
antidepressants_100_pdds                               1.0337    0.000
stroke                                                 1.3381    0.000
CHD_heart_failure_and_peripheral_vascular

In [32]:
# summary_table

In [33]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
non_insomnia_GP_consultations,1.002,0.002,0.0,1.001,1.003
benzo_and_z_drugs_100_pdds,0.994,-0.006,0.301,0.983,1.005
antidepressants_100_pdds,1.012,0.011,0.036,1.001,1.022
stroke,1.277,0.245,0.0,1.156,1.411
hypertension,0.841,-0.173,0.0,0.795,0.889
diabetes,1.124,0.117,0.02,1.019,1.24
mental_illness_non_smi,1.527,0.424,0.0,1.434,1.627
epilepsy,1.38,0.322,0.007,1.092,1.743
intercept,0.798,-0.226,0.0,0.759,0.838


### benzos quantiles

In [34]:
columns_not_for_inclusion_here = [
    'insomnia_count:0', 'insomnia_count:1_10', 'insomnia_count:above_10','insomnia','insomnia_count',
    'benzo_and_z_drugs_pdds','benzo_and_z_drugs'
]

cols_to_keep = ['benzo_and_z_drugs_pdds:1_10', 'benzo_and_z_drugs_pdds:11_100',
       'benzo_and_z_drugs_pdds:101_1000', 'benzo_and_z_drugs_pdds:1001_10000',
       'benzo_and_z_drugs_pdds:above_10000']

training_cols = [col for col in pt_features.columns if col not in columns_never_for_inclusion+columns_not_for_inclusion_here]

In [35]:
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_to_keep)

The following variables are being removed as mean = 0:

sga_depots_100_pdds


*Stage 1*
Univariate results
                                                   odds_ratio  p_value
female                                                 1.0000    1.000
age_at_index_date                                      1.0000    1.000
non_insomnia_GP_consultations                          1.0010    0.000
mood_stabilisers_100_pdds                              1.0394    0.000
benzo_and_z_drugs:1-1095_pdds                          1.2377    0.000
benzo_and_z_drugs:>1096_pdds                           1.2000    0.105
other_sedatives_100_pdds                               0.9904    0.592
fgas_100_pdds                                          1.0164    0.124
sgas_100_pdds                                          1.0502    0.190
fga_depots_100_pdds                                    0.5368    0.383
antidepressants_100_pdds                               1.0337    0.000
stroke                                   

In [36]:
# summary_table

In [37]:
        result

Unnamed: 0,OR,coef,p,[0.025,0.975]
non_insomnia_GP_consultations,1.002,0.002,0.0,1.001,1.003
benzo_and_z_drugs:1-1095_pdds,1.056,0.055,0.35,0.942,1.186
benzo_and_z_drugs:>1096_pdds,0.931,-0.071,0.537,0.742,1.168
stroke,1.279,0.246,0.0,1.158,1.413
hypertension,0.84,-0.174,0.0,0.794,0.889
diabetes,1.125,0.118,0.019,1.019,1.241
mental_illness_non_smi,1.543,0.433,0.0,1.45,1.641
epilepsy,1.38,0.322,0.007,1.093,1.743
intercept,0.796,-0.228,0.0,0.758,0.836
