In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [4]:
import os
import sys
from pprint import pprint

import pandas as pd
import numpy as np
from datetime import date, timedelta
import re

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import demres
from demres.definitions import ROOT_DIR
from demres.common.constants import entry_type
from demres.common import codelists
from demres.common.process_raw_data import *
from demres.dempred.constants import Study_Design
from dempred.functions import *

In [5]:
relev_prescriptions = pd.read_hdf('hdf/relev_prescriptions.hdf')

In [35]:
temp = pd.merge(relev_prescriptions,pegprod[['prodcode','substance strength','drug substance name']],how='left')

In [None]:
temp2 = temp[temp['drug substance name'].str.contains('ALPRAZOLAM|CHLORDIAZEPOXIDE|CLOBAZAM|CLONAZEPAM|FLURAZEPAM|LOPRAZOLAM|LORAZEPAM|LORMETAZEPAM|MIDAZOLAM|NITRAZEPAM|OXAZEPAM|TEMAZEPAM|CHLORAL|CLOMETHIAZOLE|DEXMEDETOMIDINE|MELATONIN|MEPROBAMATE|ZALEPLON|zopiclone|ZOLPIDEM|ALIMEMAZINE|ANTAZOLINE|CHLORPHENAMINE|CINNARIZINE|CINNARIZINE|CLEMASTINE|CYPROHEPTADINE|ERGOTAMINE|HYDROXYZINE|KETOTIFEN|MORPHINE|PARACETAMOL|PIZOTIFEN|PROMETHAZINE|CHLORDIAZEPOXIDE',case=False,na=False,regex=True)]

In [None]:
set(temp2['drug substance name'])

In [6]:
relev_prescriptions = relev_prescriptions[pd.notnull(relev_prescriptions['qty'])] #remove the relatively small number of prescriptions where the quantity is NaN

In [7]:
sedative_prodcodes = get_prodcodes_from_drug_name(codelists.sedatives)

In [8]:
pegprod = pd.read_csv('data/dicts/proc_pegasus_prod.csv')

In [9]:
specific_prescriptions = relev_prescriptions.loc[relev_prescriptions['prodcode'].isin([sedative_prodcodes])]

In [10]:
specific_prescriptions.sample(3)

Unnamed: 0,patid,eventdate,sysdate,prodcode,qty,ndd,numdays,numpacks,packtype,issueseq,type,index_date,exposure_start_date,exposure_end_date
2416211,16097023,2000-08-07,2000-08-07,35,28.0,1.0,0,0.0,1,1,5,2009-01-09,1999-01-12,2009-01-09
31742354,1589330,1996-07-02,2000-03-22,20,84.0,1.0,0,0.0,0,0,5,2005-07-01,1995-07-04,2005-07-01
29117346,1197301,2001-07-24,2001-07-24,35,28.0,1.0,0,0.0,1,1,5,2007-12-13,1997-12-15,2007-12-13


In [11]:
specific_prescs_plus_pegprod = pd.merge(specific_prescriptions,pegprod[['prodcode','substance strength','drug substance name']],how='left')

In [12]:
amount_and_unit = specific_prescs_plus_pegprod['substance strength'].str.extract('([0-9\.]+)(\w+)',expand=True)

In [13]:
amount_and_unit.columns=['amount','unit']

In [14]:
amount_and_unit.amount = amount_and_unit.amount.astype('float')

In [15]:
specific_prescs_plus_pegprod = pd.concat([specific_prescs_plus_pegprod,amount_and_unit],axis=1).drop(['numpacks','numdays','packtype','issueseq','type'],axis=1)

In [16]:
# specific_prescs_plus_pegprod[pd.isnull(specific_prescs_plus_pegprod['unit'])]

In [17]:
set(specific_prescs_plus_pegprod['unit'])

{'mg', 'microgram', 'micrograms'}

In [18]:
for unit,multiplier in zip(['nanogram','microgram','micrograms','gram'],[0.000001,0.001,0.001,1000]):
    unit_mask = specific_prescs_plus_pegprod['unit']==unit
    specific_prescs_plus_pegprod.loc[unit_mask,'amount']*=multiplier
    specific_prescs_plus_pegprod.loc[unit_mask,'unit']='mg'

In [19]:
set(specific_prescs_plus_pegprod['unit'])

{'mg'}

In [20]:
assert set(specific_prescs_plus_pegprod['unit'])!=1, 'There is a unit present which is not a nanogram, mg, microgram or gram'

In [21]:
specific_prescs_plus_pegprod['total_in_mg'] = specific_prescs_plus_pegprod['qty']*specific_prescs_plus_pegprod['amount']

In [22]:
prescs = specific_prescs_plus_pegprod

In [23]:
prescs[prescs['patid']==1166]['total_in_mg'].sum()

560.0

In [24]:
prescs[prescs['drug substance name']=='Clomethiazole'].sort_values(by='qty')

Unnamed: 0,patid,eventdate,sysdate,prodcode,qty,ndd,index_date,exposure_start_date,exposure_end_date,substance strength,drug substance name,amount,unit,total_in_mg
193693,5946236,1997-11-20,1997-11-20,563,1.0,1.5,2001-05-10,1991-05-13,2001-05-10,192mg,Clomethiazole,192.0,mg,192.0
128493,464163,1996-03-25,1996-03-25,563,1.0,1.0,2002-02-25,1992-02-28,2002-02-25,192mg,Clomethiazole,192.0,mg,192.0
128492,464163,1996-02-15,1996-02-15,563,1.0,1.0,2002-02-25,1992-02-28,2002-02-25,192mg,Clomethiazole,192.0,mg,192.0
128491,464163,1995-11-30,1995-11-30,563,1.0,1.0,2002-02-25,1992-02-28,2002-02-25,192mg,Clomethiazole,192.0,mg,192.0
103482,386128,2004-07-08,2004-07-08,563,7.0,1.0,2006-06-26,1996-06-28,2006-06-26,192mg,Clomethiazole,192.0,mg,1344.0
15572,706017,2007-11-03,2007-11-02,563,7.0,1.0,2008-05-22,1998-05-25,2008-05-22,192mg,Clomethiazole,192.0,mg,1344.0
15574,706017,2007-11-05,2007-11-02,563,7.0,1.0,2008-05-22,1998-05-25,2008-05-22,192mg,Clomethiazole,192.0,mg,1344.0
15575,706017,2007-12-17,2007-12-17,563,7.0,1.0,2008-05-22,1998-05-25,2008-05-22,192mg,Clomethiazole,192.0,mg,1344.0
15576,706017,2007-12-18,2007-12-17,563,7.0,1.0,2008-05-22,1998-05-25,2008-05-22,192mg,Clomethiazole,192.0,mg,1344.0
15577,706017,2007-12-19,2007-12-17,563,7.0,1.0,2008-05-22,1998-05-25,2008-05-22,192mg,Clomethiazole,192.0,mg,1344.0


In [25]:
pdds = {}

In [26]:
for sedative in codelists.sedatives:
    relev_prescs = prescs[prescs['drug substance name'].str.contains(sedative,case=False)]
    if(len(relev_prescs))>0:
        drug_amounts = list(relev_prescs['amount'])
        drug_weights = list(relev_prescs['qty'])
        pdd = np.average(drug_amounts,weights=drug_weights)
        pdds[sedative]=pdd
        assert np.notnull(pdd), 'PDD for ' + sedative 'is null'
#         if sedative=='CLOMETHIAZOLE':
#             print(pdd)
#             pprint(set(drug_amounts))
#             pprint(set(drug_weights))

In [27]:
pprint(pdds)

{'ALIMEMAZINE TARTRATE': 7.8679974300858424,
 'ALPRAZOLAM': 0.28226108097998875,
 'CHLORAL HYDRATE': 70.082596047146325,
 'CHLORDIAZEPOXIDE': 7.1031620353068607,
 'CHLORDIAZEPOXIDE HYDROCHLORIDE': 7.1031620353068607,
 'CHLORPHENAMINE MALEATE': 3.6941520837001915,
 'CINNARIZINE': 15.958033915756115,
 'CLOBAZAM': 10.0,
 'CLOMETHIAZOLE': 192.0,
 'CLONAZEPAM': 0.61942279204288941,
 'CYPROHEPTADINE HYDROCHLORIDE': 3.9950691903928743,
 'LORAZEPAM': 1.0535258198932231,
 'LORMETAZEPAM': 0.79144063842677381,
 'MELATONIN': 2.8246887966804981,
 'MEPROBAMATE': 371.63781430630576,
 'NITRAZEPAM': 4.9864648445731401,
 'OXAZEPAM': 13.136246054265952,
 'PROMETHAZINE TEOCLATE': 25.0,
 'TEMAZEPAM': 11.563754954323725,
 'ZALEPLON': 5.7523424582031968,
 'zopiclone': 5.8976915636799827}


In [28]:
drug_amounts = list(prescs[prescs['drug substance name']=='Lorazepam']['amount'])

In [29]:
drug_weights = list(prescs[prescs['drug substance name']=='Lorazepam']['qty'])

In [30]:
pdd = np.average(drug_amounts,weights=drug_weights)

In [31]:
pdd

1.0535258198932231

In [32]:
presc_count = prescs.groupby(by=['patid','drug substance name']).total_in_mg.sum().reset_index()

In [33]:
presc_count

Unnamed: 0,patid,drug substance name,total_in_mg
0,1166,Temazepam,560.0
1,4124,Cinnarizine,225.0
2,4289,Clobazam,600.0
3,6317,Cinnarizine,1260.0
4,7301,Chlorphenamine maleate,112.0
5,7301,Temazepam,840.0
6,7301,Zopiclone,315.0
7,7330,Chlorphenamine maleate,704.0
8,7330,Clobazam,300.0
9,11290,Temazepam,280.0
