In [44]:
import pandas as pd
import ast
import numpy as np

from tqdm import tqdm
# from tqdm.auto import tqdm  # for notebooks
tqdm.pandas()

import os
import openai

In [2]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

True

In [3]:
# preprocessed file from Thomas
transfers_events = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/combined_data_export.csv')

  transfers_events = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/combined_data_export.csv')


In [4]:
# bunch of nan rows
transfers_events = transfers_events[~transfers_events['stay_id'].isna() & ~transfers_events['hadm_id'].isna()]


In [10]:
# grab subject IDs
transfers_events = transfers_events.merge(edstays[['stay_id', "subject_id"]], on="stay_id", how="left")

In [11]:
# discharge summaries
discharges = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/discharge.csv.gz")

# ed stays
edstays = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/edstays.csv.gz')

# triage
triage = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/triage.csv.gz')

In [12]:
triage.stay_id.nunique(), triage.subject_id.nunique(), triage.shape

(71994, 48915, (71994, 11))

In [13]:
# ward transfers
transfers = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/transfers.csv.gz')

# higher-level services (ICU, CARD, etc)
services = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/services.csv.gz')

# get patient info
pts = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/patients.csv.gz')

# admission demographics
admissions = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/admissions.csv.gz')

# procedures
procs = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/procedures_icd.csv.gz')
procs_icd = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/d_icd_procedures.csv.gz')

# diagnoses
diags = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/diagnoses_icd.csv.gz')
diags_icd = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/d_icd_diagnoses.csv.gz')

# meds
med_orders = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/emar.csv.gz')


In [14]:
med_orders = med_orders[~med_orders['charttime'].isna() & 
                        ~med_orders['medication'].isna() & 
                        ~med_orders['event_txt'].isna()]

In [15]:
procs = procs.astype({"chartdate":"datetime64[ns]"})
med_orders = med_orders.astype({"charttime":"datetime64[ns]",
                  "scheduletime":"datetime64[ns]",
                  "storetime":"datetime64[ns]",})

discharges = discharges.astype({"charttime":"datetime64[ns]",
                               "storetime":"datetime64[ns]"})

In [16]:
# are there multiple icd code mappings? 
procs_icd.shape, procs_icd.icd_code.nunique()
# looks like only for the two versions of ICD

((85257, 3), 85251)

In [17]:
procs_icd[procs_icd['icd_code'] == "067"]

Unnamed: 0,icd_code,icd_version,long_title
20005,67,9,Excision of thyroglossal duct or tract
20006,67,10,"Lower Veins, Dilation"


In [18]:
# drop any potential repeats
procs_icd = procs_icd.groupby(["icd_code", "icd_version"]).first().reset_index()
diags_icd = diags_icd.groupby(["icd_code", "icd_version"]).first().reset_index()

In [19]:
# grab long_titles for procs/diags
procs = procs.merge(procs_icd, on=["icd_code", "icd_version"], how="left")
diags = diags.merge(diags_icd, on=["icd_code", "icd_version"], how="left")

In [20]:

# why would someone have multiple ED stays for the same admission?
# looks like they came and left the ED multiple times during the course of their hospital course? 
transfers_events[transfers_events['hadm_id'] == 23880210].iloc[0]['careunit']

"['Emergency Department', 'Emergency Department', nan, 'Medicine']"

In [21]:
transfers[transfers['hadm_id'] == 29999828].sort_values("intime")[['intime',"outtime",'careunit']]

Unnamed: 0,intime,outtime,careunit
1242364,2180-10-27 05:24:15,2180-10-27 19:50:31,Discharge Lounge
1242365,2180-10-27 19:50:31,2180-10-28 09:54:07,Med/Surg
1242363,2180-10-28 09:54:07,2180-10-29 16:32:32,Coronary Care Unit (CCU)
1242361,2180-10-29 16:32:32,2180-10-30 17:27:11,Med/Surg
1242362,2180-10-30 17:27:11,,


In [22]:
# transfers_events.stay_id.duplicated().sum()
transfers_events.hadm_id.duplicated().sum()

159

In [27]:
# create initial input text prompt
def get_demos(subject_id):
    # has gender, anchor-age, date-of-death if exists
    return pts[pts['subject_id'] == subject_id].squeeze()
    
def get_transfers(hadm_id):
    return transfers[transfers['hadm_id'] == hadm_id].sort_values("intime").squeeze()
    
def get_procs_within_service(hadm_id, transfer_event):
    adm_procs = procs[procs['hadm_id'] == hadm_id]
    adm_procs = adm_procs.sort_values("seq_num")
    
    adm_procs_in_unit = adm_procs[(adm_procs['chartdate'] > transfer_event['intime'])
                                & (adm_procs['chartdate'] < transfer_event['outtime'])]
    return adm_procs_in_unit
    
def get_procs(hadm_id):
    adm_procs = procs[procs['hadm_id'] == hadm_id]
    return adm_procs.sort_values("seq_num")

def get_diags(hadm_id):
    adm_diags = diags[diags['hadm_id'] == hadm_id]
    return adm_diags.sort_values("seq_num")

def get_med_orders_within_service(hadm_id, transfer_event):
    med_admin = med_orders[(med_orders['hadm_id'] == hadm_id) & (med_orders['event_txt'] == "Administered")]
    med_admin = med_admin.sort_values("emar_seq")
    
    adm_diags_in_unit = med_admin[(med_admin['charttime'] > transfer_event['intime'])
                                & (med_admin['charttime'] < transfer_event['outtime'])]

    adm_diags_in_unit['admin_text'] = adm_diags_in_unit['medication'] + " at " + adm_diags_in_unit['charttime'].dt.strftime('%B %d, %Y, %r')
    
    return adm_diags_in_unit

def get_med_orders(hadm_id):
    med_admin = med_orders[(med_orders['hadm_id'] == hadm_id) & (med_orders['event_txt'] == "Administered")]
    med_admin['admin_text'] = med_admin['medication'] + " at " + med_admin['charttime'].dt.strftime('%B %d, %Y, %r')
    return med_admin

def get_triage_info(stay_id):
    return triage[triage['stay_id'] == stay_id]

In [28]:
def create_pt_prompt(discharge_row):

    demos = get_demos(discharge_row['subject_id'])
    if demos.empty:
        age = r"[UNKNOWN AGE]"
        sex = r"[UNKNOWN SEX]"
    else:
        age = demos['anchor_age']
        sex = demos['gender']
    
    chief_complaints = discharge_row['chiefcomplaint']
    if sex:
        pronoun = ["he","his"] if sex == "M" else ['she',"her"]
    else:
        pronoun = ["they", "their"]

    careunits = ast.literal_eval(discharge_row['careunit'].replace('nan', 'None'))
    careunits = [x for x in careunits if x]

    # transfers with dates
    tranfers = get_transfers(discharge_row['hadm_id'])
    
    procs = get_procs(discharge_row['hadm_id'])
    diags = get_diags(discharge_row['hadm_id'])
    
    med_orders = get_med_orders(discharge_row['hadm_id'])

    prompt = f"___ is a {age} year old {sex} presenting to the ED with {chief_complaints}. Over the course of {pronoun[1]} hospital course, ___ started at {careunits[0]} and then visited {', '.join(careunits[1:])}. Over the course of their hospital stay, {pronoun[0]} was given the following diagnoses: {', '.join(diags['long_title'])} in order of importance to this admission.\n\nThis led to receiving the following procedures: {', '.join(procs['long_title'])} in order of priority.\n\n___ also received the following medications: {', '.join(med_orders['admin_text'])}"
    
    return prompt
    


In [29]:
transfers[transfers['hadm_id'] == transfers['hadm_id'].sample(1).squeeze()].sort_values("intime")

Unnamed: 0,subject_id,hadm_id,transfer_id,eventtype,careunit,intime,outtime


In [None]:
print(create_pt_prompt(transfers_events.sample(1).squeeze()))

In [34]:
prompts = transfers_events.sample(2).progress_apply(create_pt_prompt, axis=1)

100%|██████████| 2/2 [00:02<00:00,  1.34s/it]


In [35]:
transfers_events['prompts'] = prompts

In [36]:
transfers_events_with_prompts = transfers_events[~transfers_events['prompts'].isna()]

In [37]:
# Output 

# transfers_events_with_prompts.astype({"hadm_id":"int"}).rename({"Unnamed: 0":"idx"}, axis=1)[["idx", 'hadm_id','brief_hospital_course','prompts']].to_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/combined_data_export_with_prompts.csv", index=False)
# transfers_events_with_prompts.astype({"hadm_id":"int"}).rename({"Unnamed: 0":"idx"}, axis=1)[["idx", 'hadm_id','brief_hospital_course','prompts']].to_html("combined_data_export_with_prompts.html", index=False)

In [None]:
from tabulate import tabulate
tabulate(transfers_events_with_prompts.astype({"hadm_id":"int"}).rename({"Unnamed: 0":"idx"}, axis=1)[["idx", 'hadm_id','brief_hospital_course','prompts']], tablefmt="tsv")

# Service-Level Progress Note Generation 

This section generates a pseudo-progress note from service-level information using GPT-3. It uses the raw data, instead of the preprocessed transfers_events data above from Thomas. 


In [40]:
def create_pt_prompt_per_service(discharge_row):

    demos = get_demos(discharge_row['subject_id'])
    if demos.empty:
        age = r"[UNKNOWN AGE]"
        sex = r"[UNKNOWN SEX]"
    else:
        age = demos['anchor_age']
        sex = demos['gender']

    pt_edstays = edstays[edstays['hadm_id'] == discharge_row['hadm_id']]

    ccs = []
    for stay_id in pt_edstays['stay_id'].tolist():
        triage_info = get_triage_info(stay_id)
        ccs.append(triage_info['chiefcomplaint'].squeeze())
        
    chief_complaints = ", ".join(ccs)

    if sex:
        pronoun = ["he","his"] if sex == "M" else ['she',"her"]
    else:
        pronoun = ["they", "their"]

    # transfers with dates
    transfers = get_transfers(discharge_row['hadm_id'])

    # get stay admission diagnoses
    diags = get_diags(discharge_row['hadm_id'])

    init_prompt = f"___ is a {age} year old {sex} that initially presented to the ED with {chief_complaints}. By the end of {pronoun[1]} hospital stay, {pronoun[0]} was given the following diagnoses: {', '.join(diags['long_title'])} in order of importance to this admission. "
    transfer_service_prompts = []

    for index, row in transfers.iterrows():
        if row['eventtype'] == "discharge":
             transfer_service_prompts.append("")
        else:
            # TODO: add from here. 
            within_service_prompt = f"The patient was transferred and stayed in the {row['careunit']} ward between {row['intime']} and {row['outtime']}. ___ received "
            
            procs = get_procs_within_service(discharge_row['hadm_id'], row)
            med_orders = get_med_orders_within_service(discharge_row['hadm_id'], row)
    
            within_service_prompt = within_service_prompt + f"the following procedures (ordered by priority): {', '.join(procs['long_title'].tolist())}. "
            within_service_prompt = within_service_prompt + f"{pronoun[0]} also received the following medications (ordered chronologically) during the service: {', '.join(med_orders['medication'].tolist())}. "
            
            full_prompt = init_prompt + within_service_prompt + f"Given this information, please generate a progress note for this patient for their care during this part of their hospital course staying in the {row['careunit']} ward."
            transfer_service_prompts.append(full_prompt)

    transfers['service_prompts'] = transfer_service_prompts
    
    return transfers
    


In [41]:
discharges.sample(1)

Unnamed: 0,note_id,subject_id,hadm_id,note_type,note_seq,charttime,storetime,text
71703,19980545-DS-14,19980545,21585596,DS,14,2179-01-04,2179-01-08 17:24:00,\nName: ___ Unit No: ___\...


In [42]:
service_prompts = create_pt_prompt_per_service(discharges.sample(1).squeeze())

In [45]:


openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2023-07-01-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")
engine = "decile-gpt-35-turbo-16k"



message_text = [{"role":"system","content":f"You are a physician that is reviewing a patient's medical record during their stay in your ward: {service_prompts.iloc[1]['careunit']} and writing progress notes based on this information"},]

deprescribe_prompt = {"role":"user",
                 "content":service_prompts['service_prompts'].iloc[1]}


message_text.append(deprescribe_prompt)
print(f"Deprescribe Prompt: {message_text}")

completion = openai.ChatCompletion.create(
  engine=engine,
  messages = message_text,
)


Deprescribe Prompt: [{'role': 'system', 'content': "You are a physician that is reviewing a patient's medical record during their stay in your ward: Med/Surg/GYN and writing progress notes based on this information"}, {'role': 'user', 'content': '___ is a 29 year old M that initially presented to the ED with ABD DISCOMFORT. By the end of his hospital stay, he was given the following diagnoses: Malignant neoplasm of rectosigmoid junction, Nutritional marasmus, Other specified intestinal obstruction, Hydronephrosis, Body Mass Index less than 19, adult, Personal history of malignant neoplasm of large intestine, Stricture or kinking of ureter, Iron deficiency anemia, unspecified, Essential thrombocythemia in order of importance to this admission. The patient was transferred and stayed in the Med/Surg/GYN ward between 2185-01-20 00:04:00 and 2185-01-20 16:49:08. ___ received the following procedures (ordered by priority): . he also received the following medications (ordered chronologically

In [46]:
print(completion['choices'][0]['message']['content'])

Patient Name: [Patient's Name]
Date: [Current Date]

Subjective:
The patient, a 29-year-old male, was transferred to the Med/Surg/GYN ward on [Transfer Date]. He presented initially to the ED with complaints of abdominal discomfort. Throughout his hospital stay, the patient has been diagnosed with multiple medical conditions, including a malignant neoplasm of the rectosigmoid junction, nutritional marasmus, other specified intestinal obstruction, hydronephrosis, a body mass index less than 19, a personal history of malignant neoplasm of the large intestine, a stricture or kinking of the ureter, iron deficiency anemia, unspecified, and essential thrombocythemia.

Objective:
During the patient's hospitalization in the Med/Surg/GYN ward from [Start Date] to [End Date], the following procedures were performed: [List procedures performed in order of priority].

The patient was started on several medications during their stay. [List medications prescribed in chronological order].

Assessment