In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.common import codelists
from demres.common.helper_functions import *
from demres.common.process_pt_features import *
from demres.common.process_entries import *
from demres.demins.constants import Study_Design as sd
from demres.demins.statistical_functions import *
from common.helper_functions import *
from pprint import pprint
from IPython.display import display

  from pandas.core import datetools


In [4]:
pd.set_option('display.max_columns', None)

In [5]:
pd.set_option('display.max_rows', None)

## Process raw CSV files

In [6]:
# create_pegmed()

In [7]:
# create_pegprod()

In [8]:
# create_prescriptions()

In [9]:
# create_consultations()

In [10]:
# create_clinicals()

In [11]:
# create_tests()

In [12]:
# create_referrals()

In [13]:
# create_immunisations()

In [14]:
# create_medcoded_entries()

## Create processed CVS files to share with DEMINS teams
*convert the newly created HDF files to CVS (for use in other projects)*

In [15]:
# prescriptions = pd.read_hdf('data/pt_data/processed_data/hdf/prescriptions.hdf')
# prescriptions.to_csv('data/pt_data/processed_data/to_share_with_DEMINS/prescriptions.csv',index=False)

In [16]:
# consultations = pd.read_hdf('data/pt_data/processed_data/hdf/consultations.hdf')
# consultations.to_csv('data/pt_data/processed_data/to_share_with_DEMINS/consultations.csv',index=False)

In [17]:
# immunisations = pd.read_hdf('data/pt_data/processed_data/hdf/immunisations.hdf')
# immunisations.to_csv('data/pt_data/processed_data/to_share_with_DEMINS/immunisations.csv',index=False)

In [18]:
# clinicals = pd.read_hdf('data/pt_data/processed_data/hdf/clinicals.hdf')
# clinicals.to_csv('data/pt_data/processed_data/to_share_with_DEMINS/clinicals.csv',index=False)

In [19]:
# tests = pd.read_hdf('data/pt_data/processed_data/hdf/tests.hdf')
# tests.to_csv('data/pt_data/processed_data/to_share_with_DEMINS/tests.csv',index=False)

In [20]:
# referrals = pd.read_hdf('data/pt_data/processed_data/hdf/referrals.hdf')
# referrals.to_csv('data/pt_data/processed_data/to_share_with_DEMINS/referrals.csv',index=False)

In [21]:
# medcoded_entries = pd.read_hdf('data/pt_data/processed_data/hdf/medcoded_entries.hdf')
# medcoded_entries.to_csv('data/pt_data/processed_data/to_share_with_DEMINS/medcoded_entries.csv',index=False)

## Create basic pt_features dataframe
*pt_features will contain all the variables (e.g. age, female gender, insomnia) used in the logistic regression*

In [6]:
all_encounters = get_all_encounters()

In [None]:
all_entries = get_all_entries(all_encounters)

In [None]:
pt_features = create_pt_features()

In [None]:
pt_features = get_index_date_and_caseness_and_add_final_dementia_subtype(all_entries,pt_features)

In [None]:
pt_features.head(5)

In [None]:
pt_features = avoid_specific_dementia_subtypes(pt_features) 

In [None]:
pt_features = add_data_start_and_end_dates(all_encounters,pt_features)

In [None]:
# pt_features.to_csv('data/pt_data/processed_data/pt_features_demins.csv',index=False)

In [None]:
for window in sd.exposure_windows:
    print(window['name'],' being matched')    
    pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)
    pt_features = match_cases_and_controls(pt_features,window)
    display(pt_features.head(1))
    pt_features.to_csv('data/pt_data/processed_data/pt_features_demins_'+ window['name'] +'.csv',index=False)

## Add derived variables to pt_features 
*e.g. insomnia count, history of stroke, consultation count*

In [7]:
medcoded_entries = pd.read_hdf('data/pt_data/processed_data/hdf/medcoded_entries.hdf')

In [8]:
prescriptions = pd.read_hdf('data/pt_data/processed_data/hdf/prescriptions.hdf')

In [9]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_'+sd.exposure_windows[1]['name']+'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [33]:
create_pdd_for_each_drug(prescriptions,pt_features,sd.exposure_windows[1])

['CLOMETHIAZOLE', 'DIAZEPAM', 'FLURAZEPAM', 'FLURAZEPAM HYDROCHLORIDE', 'LOPRAZOLAM', 'LOPRAZOLAM MESILATELORAZEPAM', 'LORMETAZEPAM', 'MELATONIN', 'NITRAZEPAM', 'OXAZEPAM', 'TEMAZEPAM', 'ZALEPLON', 'ZOLPIDEM TARTRATE', 'ZOLPIDEM ', 'ZOPICLONE']
CLOMETHIAZOLE 	pdd: 314.3554376657825
DIAZEPAM 	pdd: 4.661071466315938
FLURAZEPAM 	No prescriptions found
FLURAZEPAM HYDROCHLORIDE 	pdd: 29.39175572519084
LOPRAZOLAM 	No prescriptions found
LOPRAZOLAM MESILATELORAZEPAM 	No prescriptions found
LORMETAZEPAM 	pdd: 0.9058665784132888
MELATONIN 	pdd: 3.0
NITRAZEPAM 	pdd: 5.991664723363171
OXAZEPAM 	pdd: 20.785283646707338
TEMAZEPAM 	pdd: 14.246090863790956
ZALEPLON 	pdd: 7.073170731707317
ZOLPIDEM TARTRATE 	pdd: 7.135525904770405
ZOLPIDEM  	No prescriptions found
ZOPICLONE 	pdd: 6.4285431565793205


In [15]:
for window in [sd.exposure_windows[0], sd.exposure_windows[2]]:
    print(window['name'],'...')
    pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_'+ window['name'] +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)
    pt_features = get_multiple_condition_statuses(pt_features,medcoded_entries,prescriptions,window,codelists.all_codelists)
    pt_features = create_PDD_columns_for_each_pt(pt_features,window,[codelists.insomnia['medications']],prescriptions)
    pt_features = create_quantiles_and_booleans(pt_features)
    display(pt_features.head(5))    
    pt_features.to_csv('data/pt_data/processed_data/pt_features_demins_'+ window['name'] +'.csv',index=False)

12_to_7 ...
insomnia
insomnia_no_hypnotics
stroke
heart_failure
mental_illness
sleep_apnoea
chronic_pulmonary_disease
epilepsy
hypnotics
There are 2739 relevant prescription entries for hypnotics


Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,age_at_index_date,CLOMETHIAZOLE_100_pdds,DIAZEPAM_100_pdds,FLURAZEPAM_100_pdds,FLURAZEPAM HYDROCHLORIDE_100_pdds,LOPRAZOLAM_100_pdds,LOPRAZOLAM MESILATELORAZEPAM_100_pdds,LORMETAZEPAM_100_pdds,MELATONIN_100_pdds,NITRAZEPAM_100_pdds,OXAZEPAM_100_pdds,TEMAZEPAM_100_pdds,ZALEPLON_100_pdds,ZOLPIDEM TARTRATE_100_pdds,ZOLPIDEM _100_pdds,ZOPICLONE_100_pdds,insomnia,insomnia_no_hypnotics,stroke,heart_failure,mental_illness,sleep_apnoea,chronic_pulmonary_disease,epilepsy,hypnotics_100_pdds,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,hypnotic_pdds:00000,hypnotic_pdds:00001_10,hypnotic_pdds:00011_100,hypnotic_pdds:00101_1000,hypnotic_pdds:01001_10000,hypnotic_pdds:10000_and_above
0,4031018,34,18,1,2007-12-10,True,49513.0,2007-12-21,1995-12-05,2819,73,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,1245425,27,425,1,2008-05-13,True,1350.0,2008-05-20,1996-04-27,58446,81,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,8070027,24,27,1,2009-12-06,True,1916.0,2009-12-23,1997-11-28,4039,85,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,5865223,10,223,1,2004-02-25,True,1916.0,2004-02-27,1992-01-31,31786,94,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.025041,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,18.025041,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,9591471,30,471,0,2008-12-15,True,1350.0,2008-12-23,1996-11-22,66373,78,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


8_to_3 ...
insomnia
insomnia_no_hypnotics
stroke
heart_failure
mental_illness
sleep_apnoea
chronic_pulmonary_disease
epilepsy
hypnotics
There are 5224 relevant prescription entries for hypnotics


Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,age_at_index_date,insomnia,insomnia_no_hypnotics,stroke,heart_failure,mental_illness,sleep_apnoea,chronic_pulmonary_disease,epilepsy,hypnotics_100_pdds,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,hypnotic_pdds:00000,hypnotic_pdds:00001_10,hypnotic_pdds:00011_100,hypnotic_pdds:00101_1000,hypnotic_pdds:01001_10000,hypnotic_pdds:10000_and_above
0,3952335,22,335,1,2002-10-01,True,1917.0,2002-10-21,1994-09-30,47221,80,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,4688370,22,370,1,2007-09-21,True,6578.0,2007-10-11,1999-09-15,51463,85,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,3636234,27,234,0,2007-12-17,True,6578.0,2008-01-04,1999-12-01,33483,80,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,465452,18,452,1,2006-03-08,True,1916.0,2006-03-08,1998-01-31,62534,88,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,5890114,34,114,0,2008-12-01,True,6578.0,2009-01-06,2000-11-28,14917,74,0,0,0,1,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
