In this file, patient conditions (diseases) are queried to find all patients with a history of cancer who also have cardiac issues (not assessed chronoclogically yet). I also compiled a list of chemotherapies from the medications.csv file and looked to see how many patients have been exposed to chemo.

In [1]:
import numpy as np
import pandas as pd
import os
from datetime import datetime, timedelta

In [2]:
os.chdir('C:/Users/Student/Documents/EHR---Team-6-Project')

In [3]:
# Load the conditions.csv df into a list and determine unique values
conditions = pd.read_csv('conditions.csv')
diseases = conditions.DESCRIPTION.unique()
# diseases

In [4]:
# Create a list of cancer conditions
cancer_list = ['Neoplasm of prostate',
       'Carcinoma in situ of prostate (disorder)',
       'Malignant tumor of colon', 
       'Primary malignant neoplasm of colon',
       'Suspected lung cancer (situation)',
       'Non-small cell lung cancer (disorder)',
       'Non-small cell carcinoma of lung  TNM stage 1 (disorder)',
       'Malignant neoplasm of breast (disorder)',
       'Metastasis from malignant tumor of prostate (disorder)',
       'Overlapping malignant neoplasm of colon'
       'Small cell carcinoma of lung (disorder)',
       'Primary small cell malignant neoplasm of lung  TNM stage 1 (disorder)',
       'Secondary malignant neoplasm of colon']

# Filiter the conditions df to get all cardiac patients
cancer_patients = conditions[conditions['DESCRIPTION'].isin(cancer_list)].reset_index()
cancer_patients.columns


Index(['index', 'START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE',
       'DESCRIPTION'],
      dtype='object')

In [5]:
# Create a list of cardiac conditions
cardiac_list = ['Chronic congestive heart failure (disorder)', 
       'Cardiac Arrest', 'History of cardiac arrest (situation)',
       'Atrial Fibrillation', 
       'Coronary Heart Disease',
       'Myocardial Infarction',
       'History of myocardial infarction (situation)']

# Filiter the conditions df to get all cardiac patients
cardiac_patients = conditions[conditions['DESCRIPTION'].isin(cardiac_list)].reset_index()
cardiac_patients.columns

Index(['index', 'START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE',
       'DESCRIPTION'],
      dtype='object')

In [6]:
# Create a list of cancer patients that have the cardiac outcomes. 
combo_patients = cancer_patients[cancer_patients['PATIENT'].isin(cardiac_patients['PATIENT'])].reset_index()
unique_combo_patients =  combo_patients['PATIENT'].unique() # 297 unique patients with cancer and cardiac disease

In [7]:
# Read in medication data and review unique meds
medications = pd.read_csv('medications.csv')
meds = medications.DESCRIPTION.unique()

# Compile a list of chemo meds from the medications data
chemo = ['1 ML DOCEtaxel 20 MG/ML Injection', '0.25 ML Leuprolide Acetate 30 MG/ML Prefilled Syringe', 
         '10 ML oxaliplatin 5 MG/ML Injection', 'Cisplatin 50 MG Injection', 'PACLitaxel 100 MG Injection', 
         '100 ML Epirubicin Hydrochloride 2 MG/ML Injection', 'Tamoxifen 10 MG Oral Tablet', 'palbociclib 100 MG Oral Capsule', 
         'Paclitaxel 100 MG Injection', '10 ML Doxorubicin Hydrochloride 2 MG/ML Injection',
         'Etoposide 100 MG Injection', 'Methotrexate 2.5 MG Oral Tablet', 'ribociclib 200 MG Oral Tablet', 
         'neratinib 40 MG Oral Tablet', 'exemestane 25 MG Oral Tablet']


In [8]:
# Search for patient who have received chemo in the list above
chemo_pats = medications[medications['DESCRIPTION'].isin(chemo)].reset_index()
chemo_pats.PATIENT.unique().shape  # 618 unique patients have received chemo

(618,)

In [9]:
# How many unique patients with cancer have received the chemos above
cancer_patients[cancer_patients['PATIENT'].isin(chemo_pats['PATIENT'])].nunique()

index          1283
START           895
STOP             22
PATIENT         542
ENCOUNTER       930
CODE             11
DESCRIPTION      11
dtype: int64

In [10]:
# Select relevant columns and convert start date to datetime format
chemo_pats = chemo_pats[['START', 'PATIENT', 'DESCRIPTION']].reset_index() #May want to reset index
chemo_pats['START'] = pd.to_datetime(pd.to_datetime(chemo_pats['START']).dt.date)
chemo_pats.dtypes

index                   int64
START          datetime64[ns]
PATIENT                object
DESCRIPTION            object
dtype: object

In [11]:
#Changing NaN values to Current Date--- Can change this to watever date we want to 'Cap' our analysis at.  Should probably be date of death or max date from dataset
cancer_patients['STOP'] = cancer_patients['STOP'].replace(np.nan, '2023-01-01')

In [12]:
#Creating a dataframe with a line item for each month between 1/1/1900 (overkill- I know) & today
r_time = pd.DataFrame(pd.date_range('01-01-1900',datetime.today().strftime("%Y-%m-%d"), 
              freq='MS').strftime("%-d/%-m/%Y").tolist(), columns=['MemMonth']) 

In [13]:
#Converting the MemMonth column to datetime
r_time['MemMonth']= pd.to_datetime(r_time['MemMonth'])

In [14]:
#Converting the START column to datetime
cancer_patients['START'] = pd.to_datetime(cancer_patients['START'])

In [15]:
#Converting the STOP column to datetime
cancer_patients['STOP'] = pd.to_datetime(cancer_patients['STOP'])

In [16]:
#Creating dataframes having equivalent number of rows in to concatenate together, will filter to appropriate date ranges in later step
cancer_patients_repeated = pd.concat([cancer_patients] * len(r_time), ignore_index=True)
r_time_repeated = pd.concat([r_time] * len(cancer_patients), ignore_index=True)

In [17]:
#Effectively, joining our repeated member month table to our repeated cancer patients table 
cancer_patients_repeated['MemMonth'] = r_time_repeated

In [18]:
#Creating First DOM Field for Filter
cancer_patients_repeated['START_FLOOR'] = (cancer_patients_repeated['START'].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))

In [19]:
#Filtering down to rows only between start and stop date values
#May want to change to the first date of the month here.  Right now, only capturing MM on the month following our start date
MemMonths = cancer_patients_repeated.query('MemMonth >= START_FLOOR and MemMonth <= STOP').sort_values(by = ['MemMonth', 'PATIENT']).reset_index()

#Dropping unneeded columns
MemMonths.drop(columns=MemMonths.columns[[0, 1, -1]], axis = 1, inplace = True)

In [20]:
#Priviewing MemMonths
MemMonths.head()


Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION,MemMonth
0,1938-03-18,2023-01-01,2e598f38-54c2-8115-6061-48e7612e4dc0,863baa02-88e9-9b89-afa5-4a380e39e916,254837009,Malignant neoplasm of breast (disorder),1938-03-01
1,1938-03-18,2023-01-01,2e598f38-54c2-8115-6061-48e7612e4dc0,863baa02-88e9-9b89-afa5-4a380e39e916,254837009,Malignant neoplasm of breast (disorder),1938-04-01
2,1938-03-18,2023-01-01,2e598f38-54c2-8115-6061-48e7612e4dc0,863baa02-88e9-9b89-afa5-4a380e39e916,254837009,Malignant neoplasm of breast (disorder),1938-05-01
3,1938-03-18,2023-01-01,2e598f38-54c2-8115-6061-48e7612e4dc0,863baa02-88e9-9b89-afa5-4a380e39e916,254837009,Malignant neoplasm of breast (disorder),1938-06-01
4,1938-03-18,2023-01-01,2e598f38-54c2-8115-6061-48e7612e4dc0,863baa02-88e9-9b89-afa5-4a380e39e916,254837009,Malignant neoplasm of breast (disorder),1938-07-01


In [21]:
#Size of Dataframe
MemMonths.shape

(540365, 7)

In [41]:
#Pivoting Dataframe to contain cancer type
MemMonthsWide = MemMonths.pivot(index = ['PATIENT', 'START', 'STOP', 'MemMonth'], columns = 'DESCRIPTION')

In [42]:
#Replacing encounter code value with 1, filling NaN w/ 0
MemMonthsWide.iloc[:, 0:-1] = np.where(MemMonthsWide.iloc[:, 0:-1].isnull(), 0, 1)

In [43]:
#Previewing new MemMonthsWide
MemMonthsWide.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ENCOUNTER,ENCOUNTER,ENCOUNTER,ENCOUNTER,ENCOUNTER,ENCOUNTER,ENCOUNTER,ENCOUNTER,ENCOUNTER,ENCOUNTER,...,CODE,CODE,CODE,CODE,CODE,CODE,CODE,CODE,CODE,CODE
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,DESCRIPTION,Carcinoma in situ of prostate (disorder),Malignant neoplasm of breast (disorder),Malignant tumor of colon,Metastasis from malignant tumor of prostate (disorder),Neoplasm of prostate,Non-small cell carcinoma of lung TNM stage 1 (disorder),Non-small cell lung cancer (disorder),Primary malignant neoplasm of colon,Primary small cell malignant neoplasm of lung TNM stage 1 (disorder),Secondary malignant neoplasm of colon,...,Malignant neoplasm of breast (disorder),Malignant tumor of colon,Metastasis from malignant tumor of prostate (disorder),Neoplasm of prostate,Non-small cell carcinoma of lung TNM stage 1 (disorder),Non-small cell lung cancer (disorder),Primary malignant neoplasm of colon,Primary small cell malignant neoplasm of lung TNM stage 1 (disorder),Secondary malignant neoplasm of colon,Suspected lung cancer (situation)
PATIENT,START,STOP,MemMonth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2023-01-01,1987-12-01,1,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,
01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2023-01-01,1988-01-01,1,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,
01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2023-01-01,1988-02-01,1,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,
01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2023-01-01,1988-03-01,1,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,
01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2023-01-01,1988-04-01,1,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ffbbfa5b-12c0-1084-d65c-bd8a03cf523a,2007-02-05,2023-01-01,2022-09-01,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,
ffbbfa5b-12c0-1084-d65c-bd8a03cf523a,2007-02-05,2023-01-01,2022-10-01,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,
ffbbfa5b-12c0-1084-d65c-bd8a03cf523a,2007-02-05,2023-01-01,2022-11-01,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,
ffbbfa5b-12c0-1084-d65c-bd8a03cf523a,2007-02-05,2023-01-01,2022-12-01,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,
