In [1]:
import numpy as np
import pandas as pd
import os
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

In [2]:
os.chdir('C:/Users/Student/Documents/EHR---Team-6-Project')

In [3]:
# Read in medication data and review unique meds
medications = pd.read_csv('medications.csv')
meds = medications.DESCRIPTION.unique()

# Compile a list of chemo meds from the medications data
chemo = ['1 ML DOCEtaxel 20 MG/ML Injection', '0.25 ML Leuprolide Acetate 30 MG/ML Prefilled Syringe', 
         '10 ML oxaliplatin 5 MG/ML Injection', 'Cisplatin 50 MG Injection', 'PACLitaxel 100 MG Injection', 
         '100 ML Epirubicin Hydrochloride 2 MG/ML Injection', 'Tamoxifen 10 MG Oral Tablet', 'palbociclib 100 MG Oral Capsule', 
         'Paclitaxel 100 MG Injection', '10 ML Doxorubicin Hydrochloride 2 MG/ML Injection',
         'Etoposide 100 MG Injection', 'Methotrexate 2.5 MG Oral Tablet', 'ribociclib 200 MG Oral Tablet', 
         'neratinib 40 MG Oral Tablet', 'exemestane 25 MG Oral Tablet']


In [4]:
# Search for patient who have received chemo in the list above
chemo_pats = medications[medications['DESCRIPTION'].isin(chemo)].reset_index()

In [5]:
# Select relevant columns and convert start date to datetime format
chemo_pats = chemo_pats[['START', 'PATIENT', 'DESCRIPTION']].reset_index() 
chemo_pats['START'] = pd.to_datetime(pd.to_datetime(chemo_pats['START']).dt.date)

In [6]:
#Replacing DESCRIPTION with a 1 for a CHEMO Flag that will be used as predictor variable
chemo_pats.iloc[:, -1] = np.where(chemo_pats.iloc[:, -1].isnull(), 0, 1)

In [7]:
#Adding MM field to chemo_pats
chemo_pats['PatMonth'] = (chemo_pats['START'].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))

In [8]:
#Dropping Unneeded fields
chemo_pats.drop(['START', 'index'], axis = 1, inplace = True)

In [9]:
#Dropping duplicates
chemo_pats = chemo_pats.drop_duplicates()

In [10]:
#Renaming to DESCRIPTION column to CHEMO
chemo_pats.rename(columns = {'DESCRIPTION':'CHEMO'}, inplace = True)

In [11]:
chemo_pats.to_csv(r'chemo_patients.csv', index = False) 