In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
cd ~/demres

/Users/zurfarosa/demres


In [21]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [22]:
pd.set_option('display.max_columns', None)
pd.options.display.max_rows = 500

In [23]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

In [24]:
window = '10_to_5' #options: '12_to_7','10_to_5','8_to_3'

In [25]:
# pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_'+subtype+'_'+window+'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True )

In [26]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_'+subtype+'_'+window+'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True )



In [27]:
len(pt_features[pt_features['isCase']==True])

15209

In [28]:
pt_features.sample(5)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,age_at_index_date,insomnia,insomnia_no_hypnotics,stroke,heart_failure,mental_illness,sleep_apnoea,chronic_pulmonary_disease,epilepsy,hypnotics_100_pdds,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,hypnotic_pdds:00000,hypnotic_pdds:00001_10,hypnotic_pdds:00011_100,hypnotic_pdds:00101_1000,hypnotic_pdds:01001_10000,hypnotic_pdds:10000_and_above
16015,6970416,23,416,1,2006-07-11,False,,2008-11-06,1994-01-31,45732,83,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
26927,9682332,30,332,1,2009-04-01,True,8634.0,2009-12-22,1991-10-31,50469,79,0,0,0,0,1,0,1,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
26572,1001473,38,473,1,2009-01-13,True,11175.0,2010-12-09,1993-01-31,71218,71,0,0,0,0,1,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
19379,16164306,24,306,1,2000-05-17,False,,2005-08-09,1990-01-31,26872,76,0,0,0,0,1,0,1,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
28782,5545325,17,325,0,2004-11-10,True,6578.0,2011-11-02,1992-01-31,49679,87,1,0,0,1,0,0,0,0,0.257101,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [29]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_end', 'data_start', 'matchid',
       'age_at_index_date', 'insomnia', 'insomnia_no_hypnotics', 'stroke',
       'heart_failure', 'mental_illness', 'sleep_apnoea',
       'chronic_pulmonary_disease', 'epilepsy', 'hypnotics_100_pdds',
       'age_at_index_date:65-69', 'age_at_index_date:70-74',
       'age_at_index_date:75-79', 'age_at_index_date:80-84',
       'age_at_index_date:85-89', 'age_at_index_date:90-99',
       'age_at_index_date:above_99', 'hypnotic_pdds:00000',
       'hypnotic_pdds:00001_10', 'hypnotic_pdds:00011_100',
       'hypnotic_pdds:00101_1000', 'hypnotic_pdds:01001_10000',
       'hypnotic_pdds:10000_and_above'],
      dtype='object')

In [30]:
# characteristics = [column for column in pt_features.columns if column not in [
#     'patid',  'yob', 'pracid', 'index_date', 'isCase',
#     'final dementia medcode', 'data_start', 'data_end', 'matchid']]

In [32]:
columns_for_inclusion = [
    'age_at_index_date',
    'female',
    'stroke',
    'heart_failure',
    'mental_illness',
    'sleep_apnoea', 
    'chronic_pulmonary_disease', 
    'epilepsy',
    'hypnotics_100_pdds',
    'insomnia',
    'age_at_index_date:65-69', 'age_at_index_date:70-74',
    'age_at_index_date:75-79', 'age_at_index_date:80-84',
    'age_at_index_date:85-89', 'age_at_index_date:90-99',
    'age_at_index_date:above_99', 
    'hypnotics_100_pdds','hypnotic_pdds:00000',
    'hypnotic_pdds:00001_10', 'hypnotic_pdds:00011_100',
    'hypnotic_pdds:00101_1000', 'hypnotic_pdds:01001_10000',
    'hypnotic_pdds:10000_and_above'
]

In [33]:
baselines = add_baseline_characteristics(columns_for_inclusion,pt_features)

In [34]:
len(pt_features[pt_features['isCase']==False]),len(pt_features[pt_features['isCase']==True])

(15209, 15209)

In [35]:
baselines[['Cases','Controls']]

Unnamed: 0,Cases,Controls
female,10055 (66.1%),10055 (66.1%)
stroke,1662 (10.9%),1120 (7.4%)
heart_failure,913 (6.0%),726 (4.8%)
mental_illness,5695 (37.4%),3611 (23.7%)
sleep_apnoea,25 (0.2%),19 (0.1%)
chronic_pulmonary_disease,3449 (22.7%),2512 (16.5%)
epilepsy,288 (1.9%),165 (1.1%)
insomnia,2689 (17.7%),1956 (12.9%)
age_at_index_date:65-69,572 (3.8%),572 (3.8%)
age_at_index_date:70-74,1384 (9.1%),1384 (9.1%)
