In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor 

from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists,druglists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

  from pandas.core import datetools


In [4]:
pd.set_option('display.max_columns', None)

## Specify exposure window

In [26]:
window = '8_to_3' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [27]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [28]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_end', 'data_start', 'matchid',
       'age_at_index_date', 'insomnia', 'insomnia_no_hypnotics', 'stroke',
       'heart_failure', 'mental_illness', 'sleep_apnoea',
       'chronic_pulmonary_disease', 'epilepsy', 'hypnotics_100_pdds',
       'age_at_index_date:65-69', 'age_at_index_date:70-74',
       'age_at_index_date:75-79', 'age_at_index_date:80-84',
       'age_at_index_date:85-89', 'age_at_index_date:90-99',
       'age_at_index_date:above_99', 'hypnotic_pdds:00000',
       'hypnotic_pdds:00001_10', 'hypnotic_pdds:00011_100',
       'hypnotic_pdds:00101_1000', 'hypnotic_pdds:01001_10000',
       'hypnotic_pdds:10000_and_above'],
      dtype='object')

In [29]:
pt_features.loc['intercept'] = 1.0

In [30]:
pt_features.sample(5)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,age_at_index_date,insomnia,insomnia_no_hypnotics,stroke,heart_failure,mental_illness,sleep_apnoea,chronic_pulmonary_disease,epilepsy,hypnotics_100_pdds,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,hypnotic_pdds:00000,hypnotic_pdds:00001_10,hypnotic_pdds:00011_100,hypnotic_pdds:00101_1000,hypnotic_pdds:01001_10000,hypnotic_pdds:10000_and_above
26214,1487115.0,15.0,115.0,1.0,2006-12-18 00:00:00,0.0,,2010-10-18 00:00:00,1996-03-03 00:00:00,7040.0,91.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
10476,10196480.0,34.0,480.0,1.0,2009-04-20 00:00:00,0.0,,2013-02-12 00:00:00,2001-03-23 00:00:00,87374.0,75.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
24160,7162294.0,28.0,294.0,0.0,2010-09-08 00:00:00,0.0,,2013-04-30 00:00:00,1999-02-28 00:00:00,29003.0,82.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
27989,5710309.0,23.0,309.0,1.0,2006-03-23 00:00:00,0.0,,2009-06-02 00:00:00,1994-04-30 00:00:00,35528.0,83.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
21806,3751459.0,22.0,459.0,1.0,2007-02-19 00:00:00,0.0,,2012-10-12 00:00:00,1999-01-31 00:00:00,53311.0,85.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [31]:
columns_for_inclusion = [
    'age_at_index_date',
    'female',
    'stroke',
    'heart_failure',
    'mental_illness',
    'sleep_apnoea', 
    'chronic_pulmonary_disease', 
#     'epilepsy',
    'hypnotics_100_pdds',
    'insomnia'
    
]

## Insomnia consultations

### insomnia yes/no

In [32]:
# workaround for deprecated chisqprob function in statsmodels
stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df) 

In [33]:
training_cols = [col for col in pt_features.columns if col in columns_for_inclusion]
summary_table,univariate_and_multivariate_results_formatted = calculate_univariate_and_multivariate_ORs(pt_features,covariates=training_cols,main_variables=columns_for_inclusion)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
insomnia  being retained as mean > 0
stroke  being retained as mean > 0
heart_failure  being retained as mean > 0
mental_illness  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
hypnotics_100_pdds  being retained as mean > 0




In [34]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=1.000"
Chronic pulmonary disease,"1.41, (1.34, 1.48), P=0.000","1.42, (1.34, 1.50), P=0.000"
Female,"1.00, (0.98, 1.03), P=0.995","0.94, (0.90, 0.99), P=0.995"
Heart failure,"1.27, (1.16, 1.40), P=0.000","1.16, (1.05, 1.28), P=0.000"
Hypnotics (100 PDDs),"1.02, (1.02, 1.03), P=0.000","1.01, (1.01, 1.02), P=0.000"
Insomnia,"1.53, (1.38, 1.70), P=0.000","1.25, (1.12, 1.40), P=0.000"
Mental illness,"1.69, (1.62, 1.76), P=0.000","1.93, (1.84, 2.02), P=0.000"
Sleep apnoea,"1.24, (0.73, 2.10), P=0.424","0.89, (0.52, 1.53), P=0.424"
Stroke,"1.48, (1.38, 1.59), P=0.000","1.45, (1.35, 1.57), P=0.000"


In [35]:
summary_table

0,1,2,3
Dep. Variable:,isCase,No. Observations:,37207.0
Model:,Logit,Df Residuals:,37198.0
Method:,MLE,Df Model:,8.0
Date:,"Thu, 31 May 2018",Pseudo R-squ.:,0.0237
Time:,17:50:23,Log-Likelihood:,-25179.0
converged:,True,LL-Null:,-25790.0
,,LLR p-value:,1.509e-258
