In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
cd ~/demres

/Users/zurfarosa/demres


In [11]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
import pylab as pl
from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [12]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [13]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [20]:
window = '12_to_7' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [21]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [22]:
pt_features.columns

Index(['patid', 'gender', 'yob', 'pracid', 'index_date', 'isCase',
       'final dementia medcode', 'data_start', 'data_end', 'matchid',
       'age_at_index_date', 'insomnia', 'insomnia_consultations', 'stroke',
       'intellectual_disability',
       'CHD_heart_failure_and_peripheral_vascular_disease', 'hypertension',
       'diabetes', 'clin_sig_alcohol_use', 'mental_illness_non_smi',
       'mental_illness_smi', 'sleep_apnoea', 'current_smoker',
       'chronic_pulmonary_disease', 'CKD', 'mood_stabilisers_100_pdds',
       'benzo_and_z_drugs_100_pdds', 'other_sedatives_100_pdds',
       'fgas_100_pdds', 'sgas_100_pdds', 'sga_depots_100_pdds',
       'fga_depots_100_pdds', 'antidepressants_100_pdds',
       'non_insomnia_GP_consultations'],
      dtype='object')

In [23]:
pt_features['intercept'] = 1.0

## Insomnia consultations

In [26]:
columns_not_for_inclusion = [
        'patid', 'yob', 'pracid', 'index_date', 'isCase',
       'final dementia medcode', 'data_start', 'data_end', 'matchid',
       'insomnia',
       'benzo_and_z_drugs_100_pdds'
]

training_cols = [col for col in pt_features.columns if col not in columns_not_for_inclusion]
# training_cols
training_cols.insert(0, training_cols.pop(training_cols.index('insomnia_consultations')))

In [28]:
univariate_results, multivariate_results,multivariate_summary = get_univariate_and_multivariate_results(pt_features,training_cols)

Optimization terminated successfully.
         Current function value: 0.684681
         Iterations 7


### *Univariate results*

In [29]:
univariate_results

Unnamed: 0,odds_ratio,p_value
insomnia_consultations,1.0633,0.003
gender,1.0,1.0
age_at_index_date,1.0,1.0
stroke,1.2961,0.003
CHD_heart_failure_and_peripheral_vascular_disease,1.0746,0.152
hypertension,0.9189,0.026
diabetes,1.2102,0.01
mental_illness_non_smi,1.3423,0.0
mental_illness_smi,1.6162,0.0
current_smoker,1.029,0.612


### *Multivariate results*

In [30]:
multivariate_results

Unnamed: 0,odds_ratio,p_value
insomnia_consultations,1.0259,0.225
gender,0.9733,0.549
age_at_index_date,1.0001,0.897
stroke,1.2047,0.044
CHD_heart_failure_and_peripheral_vascular_disease,0.9533,0.418
hypertension,0.8312,0.0
diabetes,1.0729,0.386
mental_illness_non_smi,1.2681,0.0
mental_illness_smi,1.1442,0.331
current_smoker,0.9919,0.895


In [32]:
multivariate_summary.tables[0]

0,1,2,3
Dep. Variable:,isCase,No. Observations:,8770.0
Model:,Logit,Df Residuals:,8752.0
Method:,MLE,Df Model:,17.0
Date:,"Wed, 10 May 2017",Pseudo R-squ.:,0.01221
Time:,15:40:35,Log-Likelihood:,-6004.7
converged:,True,LL-Null:,-6078.9
,,LLR p-value:,4.8079999999999996e-23


## Benzo and z-drug PDDs

In [33]:
columns_not_for_inclusion = [
        'patid', 'yob', 'pracid', 'index_date', 'isCase',
       'final dementia medcode', 'data_start', 'data_end', 'matchid',
       'insomnia','insomnia_consultations',
#        'benzo_and_z_drugs_100_pdds'
       'benzo_and_z_drugs_1_to_100pdds', 'benzo_and_z_drugs_101_to_1000pdds',
       'benzo_and_z_drugs_more_than_1000pdds',
]

training_cols = [col for col in pt_features.columns if col not in columns_not_for_inclusion]
training_cols.insert(0, training_cols.pop(training_cols.index('benzo_and_z_drugs_100_pdds')))
# training_cols.insert(0, training_cols.pop(training_cols.index('benzo_and_z_drugs_more_than_1000pdds')))
# training_cols.insert(0, training_cols.pop(training_cols.index('benzo_and_z_drugs_101_to_1000pdds')))
# training_cols.insert(0, training_cols.pop(training_cols.index('benzo_and_z_drugs_1_to_100pdds')))

In [34]:
univariate_results, multivariate_results,multivariate_summary = get_univariate_and_multivariate_results(pt_features,training_cols)

Optimization terminated successfully.
         Current function value: 0.684724
         Iterations 7


#### *Univariate results*

In [35]:
univariate_results

Unnamed: 0,odds_ratio,p_value
benzo_and_z_drugs_100_pdds,1.0131,0.002
gender,1.0,1.0
age_at_index_date,1.0,1.0
stroke,1.2961,0.003
CHD_heart_failure_and_peripheral_vascular_disease,1.0746,0.152
hypertension,0.9189,0.026
diabetes,1.2102,0.01
mental_illness_non_smi,1.3423,0.0
mental_illness_smi,1.6162,0.0
current_smoker,1.029,0.612


#### *Multivariate results*

In [36]:
multivariate_results

Unnamed: 0,odds_ratio,p_value
benzo_and_z_drugs_100_pdds,1.0038,0.385
gender,0.9737,0.555
age_at_index_date,1.0001,0.879
stroke,1.2066,0.042
CHD_heart_failure_and_peripheral_vascular_disease,0.9524,0.408
hypertension,0.8311,0.0
diabetes,1.0712,0.397
mental_illness_non_smi,1.2752,0.0
mental_illness_smi,1.1447,0.33
current_smoker,0.9957,0.944


In [37]:
multivariate_summary.tables[0]

0,1,2,3
Dep. Variable:,isCase,No. Observations:,8770.0
Model:,Logit,Df Residuals:,8752.0
Method:,MLE,Df Model:,17.0
Date:,"Wed, 10 May 2017",Pseudo R-squ.:,0.01215
Time:,15:41:04,Log-Likelihood:,-6005.0
converged:,True,LL-Null:,-6078.9
,,LLR p-value:,6.766e-23
