In [65]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [66]:
cd ~/demres

/Users/zurfarosa/demres


In [67]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.common import codelists,druglists
from demres.common.process_pt_features import *
from demres.common.process_entries import *
from demres.demins.constants import Study_Design as sd
from demres.demins.statistical_functions import *
from common.helper_functions import *
from pprint import pprint

In [68]:
pd.set_option('display.max_columns', None)

## Process raw CSV files

In [5]:
# create_pegmed()

In [6]:
# create_pegprod()

In [7]:
# create_prescriptions()

In [8]:
# create_consultations()

In [9]:
# create_clinicals()

In [10]:
# create_tests()

In [11]:
# create_referrals()

In [12]:
# create_immunisations()

In [13]:
# create_medcoded_entries()

## Create basic pt_features dataframe
*pt_features will contain all the variables (e.g. age, female gender, insomnia) used in the logistic regression*

In [69]:
# specify subtype of dementia we're interested in - 'vascular','alzheimers' or 'all_dementia'
subtype = 'all_dementia' 

In [72]:
all_encounters = get_all_encounters()

In [16]:
all_entries = get_all_entries(all_encounters)

In [17]:
# pt_features = create_pt_features()

In [18]:
# pt_features = get_index_date_and_caseness_and_add_final_dementia_subtype(all_entries,pt_features)

In [19]:
# pt_features = only_include_specific_dementia_subtype(pt_features,subtype=subtype) #does nothing if we're interested in all dementia

In [20]:
# pt_features = add_data_start_and_end_dates(all_encounters,pt_features,subtype)

In [209]:
for window in sd.exposure_windows:
    print(window['name'],' being matched')    
    pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_'+subtype+'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)
    pt_features = match_cases_and_controls(pt_features,window)
    pt_features.to_csv('data/pt_data/processed_data/pt_features_demins_'+subtype+'_'+ window['name'] +'.csv',index=False)

12_to_7  being matched
All patients 94500
Number of suitable cases 12216
Number of controls 47020
75714  patients being removed as unmatchable
10_to_5  being matched
All patients 94500
Number of suitable cases 18849
Number of controls 47020
64082  patients being removed as unmatchable
8_to_3  being matched
All patients 94500
Number of suitable cases 26042
Number of controls 47020
50622  patients being removed as unmatchable


## Add derived variables to pt_features 
*e.g. insomnia count, presence of diabetes, consultation count*

In [210]:
medcoded_entries = pd.read_hdf('data/pt_data/processed_data/hdf/medcoded_entries.hdf')

In [211]:
prescriptions = pd.read_hdf('data/pt_data/processed_data/hdf/prescriptions.hdf')

In [213]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_'+subtype+'_'+sd.exposure_windows[1]['name']+'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)
create_pdd_for_each_drug(prescriptions,druglists.all_druglists,pt_features,sd.exposure_windows[1])

CARBAMAZEPINE 	pdd: 329.571934268
GABAPENTIN 	pdd: 711.545805055
LAMOTRIGINE 	pdd: 187.326325773
PREGABALIN 	pdd: 169.484271346
SODIUM VALPROATE 	pdd: 721.028033855
VALPROIC ACID 	No prescriptions found
LITHIUM CITRATE 	pdd: 891.292307692
LITHIUM CARBONATE 	pdd: 464.293337538
VALPROATE SEMISODIUM 	pdd: 496.21653085
OXCARBAZEPINE 	pdd: 573.913043478
ALPRAZOLAM 	pdd: 0.239361702128
CHLORDIAZEPOXIDE HYDROCHLORIDE 	pdd: 10.8586489863
CLOBAZAM 	pdd: 11.7033750594
CLONAZEPAM 	pdd: 0.823051344315
DIAZEPAM 	pdd: 4.66742792872
FLUNITRAZEPAM 	pdd: 1.0
FLURAZEPAM 	No prescriptions found
FLURAZEPAM HYDROCHLORIDE 	pdd: 27.2297441592
LOPRAZOLAM 	No prescriptions found
LORAZEPAM 	pdd: 1.46318357973
LORMETAZEPAM 	pdd: 0.865905116329
MIDAZOLAM 	No prescriptions found
MIDAZOLAM HYDROCHLORIDE 	No prescriptions found
NITRAZEPAM 	pdd: 5.97603790155
OXAZEPAM 	pdd: 19.0568681231
TEMAZEPAM 	pdd: 14.2202967324
ZALEPLON 	pdd: 6.97840172786
ZOPICLONE 	pdd: 6.57257823592
ZOLPIDEM TARTRATE 	pdd: 7.23277765645
ZOLP

In [217]:
pd.options.mode.chained_assignment = None  # default='warn'

# Add condition status (e.g. insomnia count, presence of diabetes, presence of stroke)
for window in [sd.exposure_windows[1],sd.exposure_windows[2]]:
    print(window['name'],'...')
    pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_'+subtype+'_'+ window['name'] +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)
    pt_features = get_multiple_condition_statuses(pt_features,medcoded_entries,window,codelists.all_codelists)
    pt_features = create_PDD_columns_for_each_pt(pt_features,window,druglists.all_druglists,prescriptions)
    pt_features = get_consultation_count(pt_features,all_encounters,window)
    pt_features = create_quantiles_and_booleans(pt_features)
    pt_features.to_csv('data/pt_data/processed_data/pt_features_demins_'+subtype+'_'+ window['name'] +'.csv',index=False)

10_to_5 ...
insomnia
	Total insomnia events in all medcoded_events dataframe: 36951
	insomnia is being measured only during the window period
	insomnia events in this window for our patients: 1829
	Unique values   {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 29, 36, 38, 46, 53}
stroke
	Total stroke events in all medcoded_events dataframe: 48913
	stroke events in this window for our patients: 2782
	Unique values   {0, 1}
non_stroke_vascular_disease
	Total non_stroke_vascular_disease events in all medcoded_events dataframe: 147199
	non_stroke_vascular_disease events in this window for our patients: 7978
	Unique values   {0, 1}
hypertension
	Total hypertension events in all medcoded_events dataframe: 175874
	hypertension events in this window for our patients: 13823
	Unique values   {0, 1}
diabetes
	Total diabetes events in all medcoded_events dataframe: 57533
	diabetes events in this window for our patients: 3004
	Unique values   {0, 1}
mental_illness_non_smi
	T

KeyboardInterrupt: 

# Sandbox

In [28]:
pd.options.display.max_rows = 10000

In [29]:
patid = 2274541

In [55]:
hist = get_patient_history(all_entries,patid).sort_values(by='eventdate')

In [None]:
hist.sort_values(by='eventdate')

In [105]:
len(pt_features)

30844

In [106]:
pt_features.sample(15)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,earliest_sysdate,sysdate_of_final_converted_code,start_of_year_after_earliest_year_with_>15_consultations,data_start,matchid,age_at_index_date,insomnia,stroke,non_stroke_vascular_disease,hypertension,diabetes,mental_illness_non_smi,mental_illness_smi,sleep_apnoea,chronic_pulmonary_disease,epilepsy
14798,9653379,23,379,1,2008-01-04,False,,2013-03-12,1996-03-05,,1996-01-01,1996-01-01,67393,85,0,0,1,1,0,0,0,0,0,0
16394,747344,18,344,0,2008-09-05,True,6578.0,2009-05-22,1996-03-23,,1992-01-01,1992-01-01,51814,90,0,0,0,0,0,0,0,0,1,0
25789,6647120,23,120,1,2007-10-03,True,,2010-12-01,1995-12-12,,1991-01-01,1991-01-01,16844,84,0,0,0,1,0,1,0,0,1,0
17494,2285323,23,323,0,2011-01-19,True,1350.0,2012-09-19,1997-01-26,,1995-01-01,1995-01-01,49351,88,0,0,0,1,0,0,0,0,0,0
9024,19728347,25,347,1,2004-02-10,False,,2008-01-04,2002-05-21,,1992-01-01,1992-01-01,31796,79,0,0,0,1,0,0,0,0,0,0
6990,31411088,19,88,1,2003-01-07,False,,2006-06-22,1996-06-08,,1991-01-01,1991-01-01,64361,84,1,0,0,0,0,0,0,0,1,0
16083,15520227,26,227,0,2007-12-19,False,,2013-02-11,1995-10-05,,2000-01-01,1995-10-05,53173,81,0,0,0,0,0,0,0,0,1,0
19519,1835320,22,320,1,2010-06-08,False,,2013-04-15,2000-06-28,,1995-01-01,1995-01-01,6032,88,0,1,0,1,0,0,0,0,0,0
19846,5895225,18,225,1,2008-05-29,False,,2013-04-25,1996-06-20,,1995-01-01,1995-01-01,61039,90,0,0,1,1,0,0,0,0,0,0
5599,3545115,34,115,0,2008-04-18,False,,2011-05-25,1996-03-03,,2010-01-01,1996-03-03,6334,74,0,0,0,0,0,0,0,0,0,0


In [64]:
pegmed = pd.read_csv('data/dicts/proc_pegasus_medical.csv')

In [80]:
dlb_and_pdd_readcodes = get_medcodes_from_readcodes(codelists.dlb_and_pdd)

In [81]:
dlb_and_pdd_readcodes

[26270, 7572, 9509]

In [92]:
dlb_and_pdd_cases_and_controls = pt_features['final dementia medcode'].isin(dlb_and_pdd_readcodes)

In [94]:
pt_features_without_dlb_pdd_cases_and_controls = pt_features[~dlb_and_pdd_cases_and_controls]

In [96]:
pt_features_without_dlb_pdd_cases_and_controls.sample(5)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,age_at_index_date,insomnia,stroke,non_stroke_vascular_disease,hypertension,diabetes,mental_illness_non_smi,mental_illness_smi,sleep_apnoea,chronic_pulmonary_disease,epilepsy,non_insomnia_GP_consultations,mood_stabilisers_100_pdds,benzo_and_z_drugs_100_pdds,other_sedatives_100_pdds,antipsychotics_100_pdds,depot_antipsychotics_100_pdds,antidepressants_100_pdds,benzo_and_z_drugs_any,insomnia_any,insomnia_count:0,insomnia_count:1_5,non_insomnia_GP_consultations:0,non_insomnia_GP_consultations:1_10,non_insomnia_GP_consultations:11_100,non_insomnia_GP_consultations:101_1000,non_insomnia_GP_consultations:above_1000,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,antidepressant_pdds:00000,antidepressant_pdds:00001_10,antidepressant_pdds:00011_100,antidepressant_pdds:00101_1000,antidepressant_pdds:01001_10000,antidepressant_pdds:10000_and_above,antidepressant_pdds:10000_and_above.1,antipsychotic_pdds:00000,antipsychotic_pdds:00001_10,antipsychotic_pdds:00011_100,antipsychotic_pdds:00101_1000,antipsychotic_pdds:01001_10000,antipsychotic_pdds:10000_and_above,antipsychotic_pdds:10000_and_above.1,depot_antipsychotic_pdds:00000,depot_antipsychotic_pdds:00001_10,depot_antipsychotic_pdds:00011_100,depot_antipsychotic_pdds:00101_1000,depot_antipsychotic_pdds:01001_10000,depot_antipsychotic_pdds:10000_and_above,depot_antipsychotic_pdds:10000_and_above.1,other_sedative_pdds:00000,other_sedative_pdds:00001_10,other_sedative_pdds:00011_100,other_sedative_pdds:00101_1000,other_sedative_pdds:01001_10000,other_sedative_pdds:10000_and_above,other_sedative_pdds:10000_and_above.1,benzo_and_z_drug_pdds:00000,benzo_and_z_drug_pdds:00001_10,benzo_and_z_drug_pdds:00011_100,benzo_and_z_drug_pdds:00101_1000,benzo_and_z_drug_pdds:01001_10000,benzo_and_z_drug_pdds:10000_and_above,benzo_and_z_drug_pdds:10000_and_above.1,mood_stabiliser_pdds:00000,mood_stabiliser_pdds:00001_10,mood_stabiliser_pdds:00011_100,mood_stabiliser_pdds:00101_1000,mood_stabiliser_pdds:01001_10000,mood_stabiliser_pdds:10000_and_above,mood_stabiliser_pdds:10000_and_above.1,insomnia_count:above_5
23999,29514465,21,465,0,2005-12-12,True,1916.0,2007-09-25,1991-01-01,69646,84,0,1,0,1,1,0,0,0,0,0,90,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0
7921,733111,28,111,0,2010-06-23,False,,2013-05-15,1999-12-07,20030,82,0,0,0,0,0,0,0,0,0,0,79,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0
34988,9240074,18,74,1,2001-06-21,False,,2009-03-24,1990-01-01,51320,83,0,1,1,1,0,1,0,0,1,0,98,0.273862,0.211858,0.0,0.0,0.0,0.572113,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,0.0
40675,5363606,29,606,1,2010-04-09,True,4693.0,2013-05-07,1992-01-01,85521,81,0,0,1,1,0,1,1,0,0,0,371,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0
27337,3737370,39,370,0,2010-12-11,False,,2013-04-30,1996-01-01,22738,71,0,1,1,0,0,0,0,0,0,0,78,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0


In [98]:
pt_features_without_dlb_pdd_cases_and_controls.to_csv('data/pt_data/processed_data/pt_features_demins_'+subtype+'_'+ sd.exposure_windows[1]['name'] +'_non_dlb_and_pdd.csv',index=False)