In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
cd ~/demres

/Users/zurfarosa/demres


In [11]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor 

from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists,druglists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *
from IPython.display import display

In [12]:
# workaround for deprecated chisqprob function in statsmodels
stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df) 

In [13]:
pd.set_option('display.max_columns', None)

In [14]:
columns_for_inclusion = [
    'age_at_index_date',
    'female',
    'stroke',
    'heart_failure',
    'mental_illness',
    'sleep_apnoea', 
    'chronic_pulmonary_disease', 
    'hypnotics_100_pdds',
    'insomnia'
]

In [16]:
for window in sd.exposure_windows:
    print('\n',window['name'],'...')
    if window == sd.exposure_windows[1]:
        files = ['pt_features','pt_features_avoid_specific_dementia']
    else:
        files = ['pt_features']
    for file in files:
        print(file,':')  
        pt_features = pd.read_csv('data/processed_data/' + file + '_' + window['name'] +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)
#         print(pt_features.columns)
        pt_features.loc['intercept'] = 1.0
#         display(pt_features.sample(3))
        training_cols = [col for col in pt_features.columns if col in columns_for_inclusion]
        summary_table,univariate_and_multivariate_results_formatted = calculate_univariate_and_multivariate_ORs(pt_features,covariates=training_cols,main_variables=columns_for_inclusion)
        display(univariate_and_multivariate_results_formatted)
        display(summary_table)


 12_to_7 ...
pt_features :


Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=1.000"
Chronic pulmonary disease,"1.37, (1.29, 1.46), P=0.000","1.42, (1.32, 1.52), P=0.000"
Female,"1.00, (0.97, 1.04), P=0.993","0.95, (0.89, 1.01), P=0.993"
Heart failure,"1.16, (1.03, 1.30), P=0.014","1.07, (0.95, 1.21), P=0.014"
Hypnotics (100 PDDs),"1.02, (1.01, 1.03), P=0.000","1.01, (1.00, 1.02), P=0.000"
Insomnia,"1.49, (1.35, 1.65), P=0.000","1.28, (1.14, 1.42), P=0.000"
Mental illness,"1.47, (1.40, 1.54), P=0.000","1.68, (1.58, 1.79), P=0.000"
Sleep apnoea,"2.63, (1.16, 5.93), P=0.020","1.83, (0.80, 4.20), P=0.020"
Stroke,"1.50, (1.36, 1.64), P=0.000","1.50, (1.36, 1.66), P=0.000"


0,1,2,3
Dep. Variable:,isCase,No. Observations:,18447.0
Model:,Logit,Df Residuals:,18438.0
Method:,MLE,Df Model:,8.0
Date:,"Thu, 09 Aug 2018",Pseudo R-squ.:,0.02175
Time:,21:51:52,Log-Likelihood:,-12508.0
converged:,True,LL-Null:,-12786.0
,,LLR p-value:,6.3830000000000005e-115



 10_to_5 ...
pt_features :


Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=1.000"
Chronic pulmonary disease,"1.38, (1.31, 1.46), P=0.000","1.40, (1.32, 1.49), P=0.000"
Female,"1.00, (0.97, 1.03), P=0.994","0.95, (0.90, 1.00), P=0.994"
Heart failure,"1.27, (1.15, 1.40), P=0.000","1.18, (1.06, 1.31), P=0.000"
Hypnotics (100 PDDs),"1.02, (1.02, 1.03), P=0.000","1.01, (1.01, 1.02), P=0.000"
Insomnia,"1.56, (1.42, 1.72), P=0.000","1.31, (1.18, 1.44), P=0.000"
Mental illness,"1.56, (1.50, 1.63), P=0.000","1.78, (1.69, 1.87), P=0.000"
Sleep apnoea,"1.44, (0.79, 2.63), P=0.230","1.04, (0.56, 1.93), P=0.230"
Stroke,"1.47, (1.36, 1.59), P=0.000","1.44, (1.33, 1.56), P=0.000"


0,1,2,3
Dep. Variable:,isCase,No. Observations:,29845.0
Model:,Logit,Df Residuals:,29836.0
Method:,MLE,Df Model:,8.0
Date:,"Thu, 09 Aug 2018",Pseudo R-squ.:,0.02235
Time:,21:51:53,Log-Likelihood:,-20225.0
converged:,True,LL-Null:,-20687.0
,,LLR p-value:,2.383e-194


pt_features_avoid_specific_dementia :


Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=1.000"
Chronic pulmonary disease,"1.39, (1.32, 1.47), P=0.000","1.42, (1.33, 1.51), P=0.000"
Female,"1.00, (0.97, 1.03), P=0.994","0.95, (0.90, 1.00), P=0.994"
Heart failure,"1.24, (1.11, 1.38), P=0.000","1.14, (1.02, 1.28), P=0.000"
Hypnotics (100 PDDs),"1.02, (1.01, 1.03), P=0.000","1.01, (1.00, 1.02), P=0.000"
Insomnia,"1.60, (1.44, 1.77), P=0.000","1.34, (1.20, 1.50), P=0.000"
Mental illness,"1.58, (1.51, 1.65), P=0.000","1.79, (1.70, 1.89), P=0.000"
Sleep apnoea,"1.35, (0.72, 2.53), P=0.345","0.95, (0.50, 1.81), P=0.345"
Stroke,"1.49, (1.38, 1.62), P=0.000","1.47, (1.35, 1.61), P=0.000"


0,1,2,3
Dep. Variable:,isCase,No. Observations:,25759.0
Model:,Logit,Df Residuals:,25750.0
Method:,MLE,Df Model:,8.0
Date:,"Thu, 09 Aug 2018",Pseudo R-squ.:,0.02296
Time:,21:51:53,Log-Likelihood:,-17445.0
converged:,True,LL-Null:,-17855.0
,,LLR p-value:,1.0349999999999999e-171



 8_to_3 ...
pt_features :


Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=1.000"
Chronic pulmonary disease,"1.36, (1.30, 1.42), P=0.000","1.36, (1.29, 1.43), P=0.000"
Female,"1.00, (0.98, 1.02), P=0.995","0.95, (0.91, 0.99), P=0.995"
Heart failure,"1.27, (1.17, 1.39), P=0.000","1.17, (1.07, 1.28), P=0.000"
Hypnotics (100 PDDs),"1.02, (1.02, 1.03), P=0.000","1.01, (1.01, 1.02), P=0.000"
Insomnia,"1.55, (1.40, 1.71), P=0.000","1.27, (1.15, 1.41), P=0.000"
Mental illness,"1.63, (1.57, 1.69), P=0.000","1.85, (1.77, 1.93), P=0.000"
Sleep apnoea,"1.26, (0.76, 2.09), P=0.371","0.94, (0.56, 1.57), P=0.371"
Stroke,"1.48, (1.39, 1.58), P=0.000","1.45, (1.35, 1.55), P=0.000"


0,1,2,3
Dep. Variable:,isCase,No. Observations:,43023.0
Model:,Logit,Df Residuals:,43014.0
Method:,MLE,Df Model:,8.0
Date:,"Thu, 09 Aug 2018",Pseudo R-squ.:,0.02138
Time:,21:51:55,Log-Likelihood:,-29184.0
converged:,True,LL-Null:,-29821.0
,,LLR p-value:,5.401e-270
