In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor 

from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists,druglists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

  from pandas.core import datetools


In [4]:
pd.set_option('display.max_columns', None)

## Specify exposure window

In [46]:
window = '10_to_5' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [47]:
pt_features = pd.read_csv('data/processed_data/pt_features_demins_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [48]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_end', 'data_start', 'matchid',
       'age_at_index_date', 'insomnia', 'insomnia_no_hypnotics', 'stroke',
       'heart_failure', 'mental_illness', 'sleep_apnoea',
       'chronic_pulmonary_disease', 'epilepsy', 'hypnotics_100_pdds',
       'age_at_index_date:65-69', 'age_at_index_date:70-74',
       'age_at_index_date:75-79', 'age_at_index_date:80-84',
       'age_at_index_date:85-89', 'age_at_index_date:90-99',
       'age_at_index_date:above_99', 'hypnotic_pdds:00000',
       'hypnotic_pdds:00001_10', 'hypnotic_pdds:00011_100',
       'hypnotic_pdds:00101_1000', 'hypnotic_pdds:01001_10000',
       'hypnotic_pdds:10000_and_above'],
      dtype='object')

In [49]:
pt_features.loc['intercept'] = 1.0

In [50]:
pt_features.sample(5)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,age_at_index_date,insomnia,insomnia_no_hypnotics,stroke,heart_failure,mental_illness,sleep_apnoea,chronic_pulmonary_disease,epilepsy,hypnotics_100_pdds,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,hypnotic_pdds:00000,hypnotic_pdds:00001_10,hypnotic_pdds:00011_100,hypnotic_pdds:00101_1000,hypnotic_pdds:01001_10000,hypnotic_pdds:10000_and_above
16533,11452460.0,23.0,460.0,0.0,2011-01-28 00:00:00,1.0,6578.0,2013-04-24 00:00:00,1998-01-31 00:00:00,64370.0,88.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
6853,1699205.0,20.0,205.0,1.0,2008-03-12 00:00:00,0.0,,2010-04-26 00:00:00,1997-01-31 00:00:00,61361.0,88.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
11277,19480120.0,21.0,120.0,1.0,2004-07-27 00:00:00,0.0,,2007-01-18 00:00:00,1993-01-31 00:00:00,24969.0,83.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
19656,1501356.0,27.0,356.0,1.0,2010-07-16 00:00:00,1.0,1917.0,2012-04-10 00:00:00,1996-01-31 00:00:00,49745.0,83.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
22483,3892099.0,21.0,99.0,1.0,2004-02-17 00:00:00,1.0,6578.0,2011-12-13 00:00:00,1994-01-31 00:00:00,13030.0,83.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,21.058408,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [51]:
columns_for_inclusion = [
    'age_at_index_date',
    'female',
    'stroke',
    'heart_failure',
    'mental_illness',
    'sleep_apnoea', 
    'chronic_pulmonary_disease', 
    'hypnotics_100_pdds',
    'insomnia'
    
]

## Insomnia consultations

### insomnia yes/no

In [52]:
# workaround for deprecated chisqprob function in statsmodels
stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df) 

In [53]:
training_cols = [col for col in pt_features.columns if col in columns_for_inclusion]
summary_table,univariate_and_multivariate_results_formatted = calculate_univariate_and_multivariate_ORs(pt_features,covariates=training_cols,main_variables=columns_for_inclusion)

female  being retained as mean > 0
age_at_index_date  being retained as mean > 0
insomnia  being retained as mean > 0
stroke  being retained as mean > 0
heart_failure  being retained as mean > 0
mental_illness  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
hypnotics_100_pdds  being retained as mean > 0




In [54]:
univariate_and_multivariate_results_formatted

Unnamed: 0,Univariate OR,Multivariate OR
Age at index date,"1.00, (1.00, 1.00), P=1.000","1.00, (1.00, 1.00), P=1.000"
Chronic pulmonary disease,"1.39, (1.32, 1.47), P=0.000","1.42, (1.33, 1.51), P=0.000"
Female,"1.00, (0.97, 1.03), P=0.994","0.95, (0.90, 1.00), P=0.994"
Heart failure,"1.24, (1.11, 1.38), P=0.000","1.14, (1.02, 1.28), P=0.000"
Hypnotics (100 PDDs),"1.02, (1.01, 1.03), P=0.000","1.01, (1.00, 1.02), P=0.000"
Insomnia,"1.60, (1.44, 1.77), P=0.000","1.34, (1.20, 1.50), P=0.000"
Mental illness,"1.58, (1.51, 1.65), P=0.000","1.79, (1.70, 1.89), P=0.000"
Sleep apnoea,"1.35, (0.72, 2.53), P=0.345","0.95, (0.50, 1.81), P=0.345"
Stroke,"1.49, (1.38, 1.62), P=0.000","1.47, (1.35, 1.61), P=0.000"


In [55]:
summary_table

0,1,2,3
Dep. Variable:,isCase,No. Observations:,25759.0
Model:,Logit,Df Residuals:,25750.0
Method:,MLE,Df Model:,8.0
Date:,"Thu, 31 May 2018",Pseudo R-squ.:,0.02296
Time:,17:51:11,Log-Likelihood:,-17445.0
converged:,True,LL-Null:,-17855.0
,,LLR p-value:,1.0349999999999999e-171
