In this file, patient conditions (diseases) are queried to find all patients with a history of cancer who also have cardiac issues (not assessed chronoclogically yet). I also compiled a list of chemotherapies from the medications.csv file and looked to see how many patients have been exposed to chemo.

In [1]:
import numpy as np
import pandas as pd
import os
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

In [2]:
os.chdir('C:/Users/Student/Documents/EHR---Team-6-Project')

In [3]:
# Load the conditions.csv df into a list and determine unique values
conditions = pd.read_csv('conditions.csv')
diseases = conditions.DESCRIPTION.unique()
# diseases

In [4]:
# Create a list of cancer conditions
cancer_list = ['Neoplasm of prostate',
       'Carcinoma in situ of prostate (disorder)',
       'Malignant tumor of colon', 
       'Primary malignant neoplasm of colon',
       'Suspected lung cancer (situation)',
       'Non-small cell lung cancer (disorder)',
       'Non-small cell carcinoma of lung  TNM stage 1 (disorder)',
       'Malignant neoplasm of breast (disorder)',
       'Metastasis from malignant tumor of prostate (disorder)',
       'Overlapping malignant neoplasm of colon'
       'Small cell carcinoma of lung (disorder)',
       'Primary small cell malignant neoplasm of lung  TNM stage 1 (disorder)',
       'Secondary malignant neoplasm of colon']

# Filiter the conditions df to get all cardiac patients
cancer_patients = conditions[conditions['DESCRIPTION'].isin(cancer_list)].reset_index()
cancer_patients.columns


Index(['index', 'START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE',
       'DESCRIPTION'],
      dtype='object')

In [5]:
# Create a list of cardiac conditions
cardiac_list = ['Chronic congestive heart failure (disorder)', 
       'Cardiac Arrest', 'History of cardiac arrest (situation)',
       'Atrial Fibrillation', 
       'Coronary Heart Disease',
       'Myocardial Infarction',
       'History of myocardial infarction (situation)']

# Filiter the conditions df to get all cardiac patients
cardiac_patients = conditions[conditions['DESCRIPTION'].isin(cardiac_list)].reset_index()
cardiac_patients.columns

Index(['index', 'START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE',
       'DESCRIPTION'],
      dtype='object')

In [6]:
# Create a list of cancer patients that have the cardiac outcomes. 
combo_patients = cancer_patients[cancer_patients['PATIENT'].isin(cardiac_patients['PATIENT'])].reset_index()
unique_combo_patients =  combo_patients['PATIENT'].unique() # 297 unique patients with cancer and cardiac disease

In [7]:
# Read in medication data and review unique meds
medications = pd.read_csv('medications.csv')
meds = medications.DESCRIPTION.unique()

# Compile a list of chemo meds from the medications data
chemo = ['1 ML DOCEtaxel 20 MG/ML Injection', '0.25 ML Leuprolide Acetate 30 MG/ML Prefilled Syringe', 
         '10 ML oxaliplatin 5 MG/ML Injection', 'Cisplatin 50 MG Injection', 'PACLitaxel 100 MG Injection', 
         '100 ML Epirubicin Hydrochloride 2 MG/ML Injection', 'Tamoxifen 10 MG Oral Tablet', 'palbociclib 100 MG Oral Capsule', 
         'Paclitaxel 100 MG Injection', '10 ML Doxorubicin Hydrochloride 2 MG/ML Injection',
         'Etoposide 100 MG Injection', 'Methotrexate 2.5 MG Oral Tablet', 'ribociclib 200 MG Oral Tablet', 
         'neratinib 40 MG Oral Tablet', 'exemestane 25 MG Oral Tablet']


In [8]:
# Search for patient who have received chemo in the list above
chemo_pats = medications[medications['DESCRIPTION'].isin(chemo)].reset_index()
chemo_pats.PATIENT.unique().shape  # 618 unique patients have received chemo

(618,)

In [9]:
# How many unique patients with cancer have received the chemos above
cancer_patients[cancer_patients['PATIENT'].isin(chemo_pats['PATIENT'])].nunique()

index          1283
START           895
STOP             22
PATIENT         542
ENCOUNTER       930
CODE             11
DESCRIPTION      11
dtype: int64

In [10]:
# Select relevant columns and convert start date to datetime format
chemo_pats = chemo_pats[['START', 'PATIENT', 'DESCRIPTION']].reset_index() #May want to reset index
chemo_pats['START'] = pd.to_datetime(pd.to_datetime(chemo_pats['START']).dt.date)
chemo_pats.dtypes

index                   int64
START          datetime64[ns]
PATIENT                object
DESCRIPTION            object
dtype: object

In [11]:
#Changing NaN values to Current Date--- Can change this to watever date we want to 'Cap' our analysis at.  Should probably be date of death or max date from dataset
cancer_patients['STOP'] = cancer_patients['STOP'].replace(np.nan, '2021-08-01')

In [12]:
#Creating a dataframe with a line item for each month between 1/1/1900 (overkill- I know) & today
r_time = pd.DataFrame(pd.date_range('01-01-1900',datetime.today().strftime("%Y-%m-%d"), 
              freq='MS').strftime("%-d/%-m/%Y").tolist(), columns=['MemMonth']) 

In [13]:
#Converting the MemMonth column to datetime
r_time['MemMonth']= pd.to_datetime(r_time['MemMonth'])

In [14]:
#Converting the START column to datetime
cancer_patients['START'] = pd.to_datetime(cancer_patients['START'])

In [15]:
#Converting the STOP column to datetime
cancer_patients['STOP'] = pd.to_datetime(cancer_patients['STOP'])

In [16]:
#Creating dataframes having equivalent number of rows in order to join together, will filter to appropriate date ranges in later step
cancer_patients_repeated = pd.concat([cancer_patients] * len(r_time), ignore_index=True)
r_time_repeated = pd.concat([r_time] * len(cancer_patients), ignore_index=True)

In [17]:
#Effectively, joining our repeated member month table to our repeated cancer patients table 
cancer_patients_repeated['MemMonth'] = r_time_repeated

In [18]:
#Creating First DOM Field for Filter
cancer_patients_repeated['START_FLOOR'] = (cancer_patients_repeated['START'].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))

In [19]:
#Filtering down to rows only between start and stop date values
#May want to change to the first date of the month here.  Right now, only capturing MM on the month following our start date
MemMonths = cancer_patients_repeated.query('MemMonth >= START_FLOOR and MemMonth <= STOP').sort_values(by = ['MemMonth', 'PATIENT']).reset_index()

#Dropping unneeded columns
MemMonths.drop(columns=MemMonths.columns[[0, 1, -1]], axis = 1, inplace = True)

In [20]:
#Previewing MemMonths
MemMonths.head()


Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION,MemMonth
0,1938-03-18,2021-08-01,2e598f38-54c2-8115-6061-48e7612e4dc0,863baa02-88e9-9b89-afa5-4a380e39e916,254837009,Malignant neoplasm of breast (disorder),1938-03-01
1,1938-03-18,2021-08-01,2e598f38-54c2-8115-6061-48e7612e4dc0,863baa02-88e9-9b89-afa5-4a380e39e916,254837009,Malignant neoplasm of breast (disorder),1938-04-01
2,1938-03-18,2021-08-01,2e598f38-54c2-8115-6061-48e7612e4dc0,863baa02-88e9-9b89-afa5-4a380e39e916,254837009,Malignant neoplasm of breast (disorder),1938-05-01
3,1938-03-18,2021-08-01,2e598f38-54c2-8115-6061-48e7612e4dc0,863baa02-88e9-9b89-afa5-4a380e39e916,254837009,Malignant neoplasm of breast (disorder),1938-06-01
4,1938-03-18,2021-08-01,2e598f38-54c2-8115-6061-48e7612e4dc0,863baa02-88e9-9b89-afa5-4a380e39e916,254837009,Malignant neoplasm of breast (disorder),1938-07-01


In [21]:
#Size of Dataframe
MemMonths.shape

(516395, 7)

In [22]:
#Pivoting Dataframe to contain cancer type
MemMonthsWide = MemMonths.pivot(index = ['PATIENT', 'START', 'STOP', 'MemMonth'], columns = 'DESCRIPTION', values = 'DESCRIPTION')

In [23]:
#Replacing encounter code value with 1, filling NaN w/ 0
MemMonthsWide.iloc[:, 0:] = np.where(MemMonthsWide.iloc[:, 0:].isnull(), 0, 1)

In [24]:
#Resetting index
MemMonthsWide.reset_index(inplace = True)
#Previewing new MemMonthsWide
#MemMonthsWide.head()

In [25]:
#Adding Patient Data
patients = pd.read_csv('patients.csv')

In [26]:
patients.drop(['SSN', 'DRIVERS', 'PASSPORT', 'PREFIX',
       'FIRST', 'LAST', 'SUFFIX', 'MAIDEN', 'BIRTHPLACE', 'ADDRESS', 'CITY', 'STATE', 'COUNTY', 'ZIP',
       'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE'], axis = 1, inplace = True)

In [27]:
patients['BIRTHDATE']=pd.to_datetime(patients['BIRTHDATE'])
patients['DEATHDATE']=pd.to_datetime(patients['DEATHDATE'])

In [28]:
# Combine 'native' and 'other' into one group as there are few values
patients['RACE'].replace(to_replace='native', value='other', inplace=True)
patients.RACE.value_counts(dropna=False)

white    2978
black     316
asian     233
other      12
Name: RACE, dtype: int64

In [29]:
# Convert Gender to boolean/integer
patients['ISMALE'] = patients['GENDER']=='M'

# Convert Martial to boolean/integer
patients['ISMARRIED'] = patients['MARITAL']=='M'

# Convert Ethnicity to boolean/integer
patients['ISHISPANIC'] = patients['ETHNICITY']=='hispanic'

patients = patients.drop(['MARITAL', 'ETHNICITY', 'GENDER'], axis = 1)

In [30]:
# Convert Gender to integer
patients['ISMALE'] = patients['ISMALE'].astype(int)

# Convert Marital to integer
patients['ISMARRIED'] = patients['ISMARRIED'].astype(int)

# Convert Ethnicity to integer
patients['ISHISPANIC'] = patients['ISHISPANIC'].astype(int)

In [31]:
patients.head()

Unnamed: 0,Id,BIRTHDATE,DEATHDATE,RACE,ISMALE,ISMARRIED,ISHISPANIC
0,8b0484cd-3dbd-8b8d-1b72-a32f74a5a846,1957-04-07,NaT,white,1,1,0
1,b8eb8d31-1031-fb5b-e207-b9815f80744c,1975-08-16,NaT,asian,1,0,0
2,ce9bd436-6b59-0452-86a4-61f3642736bc,1945-05-11,2015-04-09,white,1,1,0
3,6fc3e360-ae68-c411-e091-4734df51eb18,1947-12-30,NaT,black,0,1,0
4,ce4ce4d8-d4e2-aca2-5a92-8ce703c5077a,1993-02-05,NaT,white,0,1,0


In [32]:
#Creating a pivoted view of patients for joining to MemMonths
patients_wide = patients.pivot(index = ['Id', 'BIRTHDATE', 'DEATHDATE', 'ISMALE', 'ISMARRIED', 'ISHISPANIC'], columns = 'RACE', values = 'RACE')

In [33]:
#Replacing RACE value with 1, filling NaN w/ 0
patients_wide.iloc[:, 0:] = np.where(patients_wide.iloc[:, 0:].isnull(), 0, 1)

In [34]:
#Resetting Index
patients_wide.reset_index(inplace = True)
#patients_wide.head()

In [35]:
#Pivoting Patient Data
MemMonthsWider = MemMonthsWide.merge(patients_wide, left_on = 'PATIENT', right_on = 'Id', how = 'left')

In [36]:
MemMonthsWider.head()

Unnamed: 0,PATIENT,START,STOP,MemMonth,Carcinoma in situ of prostate (disorder),Malignant neoplasm of breast (disorder),Malignant tumor of colon,Metastasis from malignant tumor of prostate (disorder),Neoplasm of prostate,Non-small cell carcinoma of lung TNM stage 1 (disorder),...,Id,BIRTHDATE,DEATHDATE,ISMALE,ISMARRIED,ISHISPANIC,asian,black,other,white
0,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1987-12-01,1,0,0,0,1,0,...,01541ad3-5323-cb64-96a7-c2ef42941253,1918-09-05,1996-11-07,1,0,0,0,0,0,1
1,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-01-01,1,0,0,0,1,0,...,01541ad3-5323-cb64-96a7-c2ef42941253,1918-09-05,1996-11-07,1,0,0,0,0,0,1
2,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-02-01,1,0,0,0,1,0,...,01541ad3-5323-cb64-96a7-c2ef42941253,1918-09-05,1996-11-07,1,0,0,0,0,0,1
3,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-03-01,1,0,0,0,1,0,...,01541ad3-5323-cb64-96a7-c2ef42941253,1918-09-05,1996-11-07,1,0,0,0,0,0,1
4,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-04-01,1,0,0,0,1,0,...,01541ad3-5323-cb64-96a7-c2ef42941253,1918-09-05,1996-11-07,1,0,0,0,0,0,1


In [37]:
#Converting the START column to datetime
cardiac_patients['START'] = pd.to_datetime(cardiac_patients['START'])

In [38]:
#Creating MemMonth field that is first day of the Start date's month
cardiac_patients['MemMonth'] = (cardiac_patients['START'].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))

In [39]:
#Pivoting Dataframe to contain cardiac event
cardiac_patients_wide = cardiac_patients.pivot(index = ['PATIENT', 'START', 'MemMonth'], columns = 'DESCRIPTION', values = 'DESCRIPTION')

In [40]:
#Resetting index
cardiac_patients_wide.reset_index(inplace = True)
#Previewing new cardiac_patients_wide
#cardiac_patients_wide.head()

In [41]:
#Joining Cardiac and MemMonths
MemMonthsWidest = MemMonthsWider.merge(cardiac_patients_wide, on = ['PATIENT', 'MemMonth'], how = 'left')

In [42]:
#Replacing description value with 1, filling NaN w/ 0
MemMonthsWidest.iloc[:, -7:] = np.where(MemMonthsWidest.iloc[:, -7:].isnull(), 0, 1)

In [43]:
MemMonthsWidest.head()

Unnamed: 0,PATIENT,START_x,STOP,MemMonth,Carcinoma in situ of prostate (disorder),Malignant neoplasm of breast (disorder),Malignant tumor of colon,Metastasis from malignant tumor of prostate (disorder),Neoplasm of prostate,Non-small cell carcinoma of lung TNM stage 1 (disorder),...,other,white,START_y,Atrial Fibrillation,Cardiac Arrest,Chronic congestive heart failure (disorder),Coronary Heart Disease,History of cardiac arrest (situation),History of myocardial infarction (situation),Myocardial Infarction
0,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1987-12-01,1,0,0,0,1,0,...,0,1,NaT,0,0,0,0,0,0,0
1,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-01-01,1,0,0,0,1,0,...,0,1,NaT,0,0,0,0,0,0,0
2,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-02-01,1,0,0,0,1,0,...,0,1,NaT,0,0,0,0,0,0,0
3,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-03-01,1,0,0,0,1,0,...,0,1,NaT,0,0,0,0,0,0,0
4,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-04-01,1,0,0,0,1,0,...,0,1,NaT,0,0,0,0,0,0,0


In [44]:
#Creating Age Field
MemMonthsWidest['Age'] = MemMonthsWidest.apply(lambda x: relativedelta(x['MemMonth'], x['BIRTHDATE']).years, axis=1)

In [45]:
#Creating Deceased Flag
MemMonthsWidest['DeceasedFlag'] = (MemMonthsWidest['MemMonth'] == (MemMonthsWidest['DEATHDATE'].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))).astype(int)


In [46]:
#Dropping rows after member had passed
MemMonthsWidest = MemMonthsWidest.query('MemMonth <= DEATHDATE').reset_index()

In [47]:
#MemMonthsWidest.to_csv(r'MemMonthsWidest.csv') 

In [48]:
#Dropping Unneeded Fields
MemMonthsWidest.drop(['Id', 'BIRTHDATE', 'DEATHDATE', 'START_y', 'index'], axis = 1, inplace = True)

In [49]:
#Replacing null values with 0s
chemo_pats.iloc[:, -1] = np.where(chemo_pats.iloc[:, -1].isnull(), 0, 1)

In [50]:
#Adding MM field to chemo_pats
chemo_pats['MemMonth'] = (chemo_pats['START'].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))

In [51]:
#Dropping Unneeded fields
chemo_pats.drop(['START', 'index'], axis = 1, inplace = True)

In [52]:
#Dropping duplicates
chemo_pats = chemo_pats.drop_duplicates()

In [53]:
#Renaming to chemo column
chemo_pats.rename(columns = {'DESCRIPTION':'CHEMO'}, inplace = True)

In [54]:
chemo_pats.head()

Unnamed: 0,PATIENT,CHEMO,MemMonth
0,a4840112-31e6-b8a8-296f-7a4b89bee4f7,1,1977-07-01
2,40c7c5d7-e21d-0aec-3023-bf613f37a5f1,1,1995-06-01
3,0c187b1f-27d8-a277-54ba-a2b7ee0db45e,1,1978-04-01
5,a210e7ea-ae55-257f-6476-ca2e1fcfcb21,1,1981-09-01
7,a210e7ea-ae55-257f-6476-ca2e1fcfcb21,1,1983-05-01


In [55]:
#Joining Chemo Patients to Mem Month Table
MemMonthsMoreWidest = MemMonthsWidest.merge(chemo_pats, on = ['PATIENT', 'MemMonth'], how = 'left')

In [56]:
#Changeing NaNs to 0s
MemMonthsMoreWidest.iloc[:, -1] = np.where(MemMonthsMoreWidest.iloc[:, -1].isnull(), 0, 1)

In [57]:
MemMonthsMoreWidest.head()

Unnamed: 0,PATIENT,START_x,STOP,MemMonth,Carcinoma in situ of prostate (disorder),Malignant neoplasm of breast (disorder),Malignant tumor of colon,Metastasis from malignant tumor of prostate (disorder),Neoplasm of prostate,Non-small cell carcinoma of lung TNM stage 1 (disorder),...,Atrial Fibrillation,Cardiac Arrest,Chronic congestive heart failure (disorder),Coronary Heart Disease,History of cardiac arrest (situation),History of myocardial infarction (situation),Myocardial Infarction,Age,DeceasedFlag,CHEMO
0,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1987-12-01,1,0,0,0,1,0,...,0,0,0,0,0,0,0,69,0,1
1,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-01-01,1,0,0,0,1,0,...,0,0,0,0,0,0,0,69,0,0
2,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-02-01,1,0,0,0,1,0,...,0,0,0,0,0,0,0,69,0,0
3,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-03-01,1,0,0,0,1,0,...,0,0,0,0,0,0,0,69,0,0
4,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-04-01,1,0,0,0,1,0,...,0,0,0,0,0,0,0,69,0,0


In [58]:
#Load Encounter Data
encounters = pd.read_csv('encounters.csv')

In [59]:
encounters.head()

Unnamed: 0,Id,START,STOP,PATIENT,ORGANIZATION,PROVIDER,PAYER,ENCOUNTERCLASS,CODE,DESCRIPTION,BASE_ENCOUNTER_COST,TOTAL_CLAIM_COST,PAYER_COVERAGE,REASONCODE,REASONDESCRIPTION
0,03bc904a-c655-0580-a620-264b065c01e6,1991-06-23T08:57:56Z,1991-06-23T09:12:56Z,8b0484cd-3dbd-8b8d-1b72-a32f74a5a846,7185ef1a-1b68-3723-8804-32dacb99e678,5ab0650d-1376-3bde-83c3-7f517505bdb7,d47b3510-2895-3b70-9897-342d681c769d,wellness,162673000,General examination of patient (procedure),129.16,129.16,49.16,,
1,7cfc42ac-357a-c88e-1275-9d63f900b7d8,2001-04-22T08:57:56Z,2001-04-22T09:12:56Z,8b0484cd-3dbd-8b8d-1b72-a32f74a5a846,7185ef1a-1b68-3723-8804-32dacb99e678,5ab0650d-1376-3bde-83c3-7f517505bdb7,d47b3510-2895-3b70-9897-342d681c769d,wellness,162673000,General examination of patient (procedure),129.16,129.16,49.16,,
2,42dbe56f-5b30-38b3-5634-a9b1a0206e44,1993-10-09T13:11:33Z,1993-10-09T13:26:33Z,b8eb8d31-1031-fb5b-e207-b9815f80744c,87f6941d-90f6-3c1d-b443-4038458cf8d7,5db62284-9e52-3c8e-bde0-53d81bd39963,b1c428d6-4f07-31e0-90f0-68ffa6ff8c76,wellness,162673000,General examination of patient (procedure),129.16,129.16,0.0,,
3,d3cbf195-f49a-24f5-fe79-c6dc21fb92da,2003-04-27T08:57:56Z,2003-04-27T09:12:56Z,8b0484cd-3dbd-8b8d-1b72-a32f74a5a846,7185ef1a-1b68-3723-8804-32dacb99e678,5ab0650d-1376-3bde-83c3-7f517505bdb7,d47b3510-2895-3b70-9897-342d681c769d,wellness,162673000,General examination of patient (procedure),129.16,129.16,49.16,,
4,155aa73b-46da-5808-c218-80a5ed671009,2003-04-27T08:57:56Z,2003-04-27T09:12:56Z,8b0484cd-3dbd-8b8d-1b72-a32f74a5a846,49318f80-bd8b-3fc7-a096-ac43088b0c12,6649b133-96f5-3920-90c7-2e61723d7dc8,d47b3510-2895-3b70-9897-342d681c769d,ambulatory,185347001,Encounter for problem,77.49,77.49,0.0,271737000.0,Anemia (disorder)


In [60]:
#Converting the START column to datetime
encounters['START'] = pd.to_datetime(encounters['START'])

In [61]:
#Assign MM
encounters['MemMonth'] = (encounters['START'].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))

In [62]:
#Converting the MemMonth column to datetime w/o UTC
encounters['MemMonth'] = pd.to_datetime(encounters['MemMonth']).dt.tz_localize(None)

In [63]:
#Calculate Patient Spend
PatCost = encounters.groupby(['PATIENT', 'MemMonth', 'ENCOUNTERCLASS'])['TOTAL_CLAIM_COST'].sum().reset_index()

In [64]:
#Pivot Patient Spend
PatCostWide = PatCost.pivot(index = ['PATIENT', 'MemMonth'], columns = 'ENCOUNTERCLASS', values = 'TOTAL_CLAIM_COST').reset_index()

In [65]:
#Converting the MemMonth column to datetime w/o UTC
#PatCostWide['MemMonth'] = pd.to_datetime(PatCostWide['MemMonth']).dt.tz_localize(None)

In [66]:
#Merging into one dataframe
MemMonthsMostWidest = MemMonthsMoreWidest.merge(PatCostWide, on = ['PATIENT', 'MemMonth'], how = 'left')


In [67]:
#Filling NaN values with 0s
MemMonthsMostWidest.fillna(0, inplace = True)

In [68]:
MemMonthsMostWidest.head()

Unnamed: 0,PATIENT,START_x,STOP,MemMonth,Carcinoma in situ of prostate (disorder),Malignant neoplasm of breast (disorder),Malignant tumor of colon,Metastasis from malignant tumor of prostate (disorder),Neoplasm of prostate,Non-small cell carcinoma of lung TNM stage 1 (disorder),...,Myocardial Infarction,Age,DeceasedFlag,CHEMO,ambulatory,emergency,inpatient,outpatient,urgentcare,wellness
0,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1987-12-01,1,0,0,0,1,0,...,0,69,0,1,154.98,0.0,0.0,0.0,0.0,129.16
1,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-01-01,1,0,0,0,1,0,...,0,69,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-02-01,1,0,0,0,1,0,...,0,69,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-03-01,1,0,0,0,1,0,...,0,69,0,0,77.49,0.0,0.0,0.0,0.0,0.0
4,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-04-01,1,0,0,0,1,0,...,0,69,0,0,77.49,0.0,0.0,0.0,0.0,0.0


In [69]:
#Count of ER_Visits
ER_Encounters = encounters.groupby(['PATIENT','MemMonth'])['ENCOUNTERCLASS'].apply(lambda x: ((x=='emergency')|(x=='urgentcare')).sum()).reset_index(name='ER_Visits')

In [70]:
#Count of Inpatient Admits
Inpt_Encounters = encounters.groupby(['PATIENT','MemMonth'])['ENCOUNTERCLASS'].apply(lambda x: (x=='inpatient').sum()).reset_index(name='Inpt_Admits')

In [71]:
Inpt_Encounters.head()

Unnamed: 0,PATIENT,MemMonth,Inpt_Admits
0,0020dcbe-9f8d-920d-c008-d68debcef322,1929-11-01,0
1,0020dcbe-9f8d-920d-c008-d68debcef322,1931-05-01,0
2,0020dcbe-9f8d-920d-c008-d68debcef322,1931-11-01,0
3,0020dcbe-9f8d-920d-c008-d68debcef322,1932-05-01,0
4,0020dcbe-9f8d-920d-c008-d68debcef322,1932-10-01,0


In [72]:
#Joining ER and Admits to MM Table
MemMonthsMostWidester = MemMonthsMostWidest.merge(ER_Encounters, on = ['PATIENT','MemMonth'], how = 'left') \
.merge(Inpt_Encounters, on = ['PATIENT','MemMonth'], how = 'left')

In [73]:
#Filling NaN Values w/ 0s
MemMonthsMostWidester.fillna(0, inplace = True)

In [74]:
MemMonthsMostWidester.head()

Unnamed: 0,PATIENT,START_x,STOP,MemMonth,Carcinoma in situ of prostate (disorder),Malignant neoplasm of breast (disorder),Malignant tumor of colon,Metastasis from malignant tumor of prostate (disorder),Neoplasm of prostate,Non-small cell carcinoma of lung TNM stage 1 (disorder),...,DeceasedFlag,CHEMO,ambulatory,emergency,inpatient,outpatient,urgentcare,wellness,ER_Visits,Inpt_Admits
0,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1987-12-01,1,0,0,0,1,0,...,0,1,154.98,0.0,0.0,0.0,0.0,129.16,0.0,0.0
1,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-01-01,1,0,0,0,1,0,...,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-02-01,1,0,0,0,1,0,...,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-03-01,1,0,0,0,1,0,...,0,0,77.49,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,01541ad3-5323-cb64-96a7-c2ef42941253,1987-12-18,2021-08-01,1988-04-01,1,0,0,0,1,0,...,0,0,77.49,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [75]:
MemMonthsMostWidester.shape

(73132, 40)