In [3]:
N = 2
output = f"sample_output{N}.csv"
output2 = f"eval{N}.html"
output3 = f"scores{N}.csv"


# Brief Hospital Course Generation

This notebook performs step 2 of the Brief Hospital Course pipeline, in which we generate a brief hospital course from a set of service-level SOAP notes passed into a GPT-3.5 model. 

In [4]:
import pandas as pd
import ast
import numpy as np

from tqdm import tqdm
# from tqdm.auto import tqdm  # for notebooks
tqdm.pandas()

import os
import openai

In [5]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

True

## Read in GPT-Generated SOAP Notes

In [6]:
soap_notes = pd.read_csv(output)

## Read in Radiology Reports

In [7]:
radiology = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/radiology.csv.gz")

## Read in Encounter-Level Structured Data

In [8]:
# discharge summaries
discharges = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/discharge.csv.gz")

# ed stays
edstays = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/edstays.csv.gz')

# triage
triage = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/triage.csv.gz')

# ward transfers
transfers = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/transfers.csv.gz')

# higher-level services (ICU, CARD, etc)
services = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/services.csv.gz')

# get patient info
pts = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/patients.csv.gz')

# admission demographics
admissions = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/admissions.csv.gz')

In [9]:

# diagnoses
diags = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/diagnoses_icd.csv.gz')
diags_icd = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/d_icd_diagnoses.csv.gz')


In [10]:
# drop any potential repeats
diags_icd = diags_icd.groupby(["icd_code", "icd_version"]).first().reset_index()

# grab long_titles for procs/diags

diags = diags.merge(diags_icd, on=["icd_code", "icd_version"], how="left")

### Clean up/type cast data

In [11]:
discharges = discharges.astype({"charttime":"datetime64[ns]",
                               "storetime":"datetime64[ns]"})

## Generate SOAP Note List Prompt

In [15]:
soap_notes

Unnamed: 0.1,Unnamed: 0,subject_id,hadm_id,transfer_id,eventtype,careunit,intime,outtime,service_prompts,gpt_SOAP_note
0,1876400,19921471,26949917.0,37663439,ED,Emergency Department,2153-09-18 21:58:00,2153-09-19 03:32:00,___ is a 62 year old M that initially presente...,"S: The patient, a 62-year-old male, presented ..."
1,1876399,19921471,26949917.0,31958057,admit,Medicine,2153-09-19 03:32:00,2153-09-19 20:39:55,___ is a 62 year old M that initially presente...,"Subjective:\nMr. ___, a 62-year-old male, was ..."
2,1876402,19921471,26949917.0,38441125,transfer,Medicine,2153-09-19 20:39:55,2153-09-20 13:05:42,___ is a 62 year old M that initially presente...,"Subjective:\nThe patient, ___ presented to the..."


In [16]:
discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze()

note_id                                          19921471-DS-40
subject_id                                             19921471
hadm_id                                                26949917
note_type                                                    DS
note_seq                                                     40
charttime                                   2153-09-20 00:00:00
storetime                                   2153-09-20 13:16:00
text           \nName:  ___                   Unit No:   ___...
Name: 71296, dtype: object

In [None]:
def create_pt_prompt(discharge_row, shots=None):
    '''Loop over the discharge instructions file in the challenge data. We include any additional examples we might want to include using the `shots` variable. 
    '''

    pt_edstays = edstays[edstays['hadm_id'] == discharge_row['hadm_id']]

    demos = get_demos(discharge_row['subject_id'])
    if demos.empty:
        age = r"[UNKNOWN AGE]"
        sex = r"[UNKNOWN SEX]"
    else:
        age = demos['anchor_age']
        sex = demos['gender']

    ccs = []
    for stay_id in pt_edstays['stay_id'].tolist():
        # TODO: Drop in Triage Vitals/Pain Scale, etc. Currently only using CCs
        triage_info = get_triage_info(stay_id)
        ccs.append(triage_info['chiefcomplaint'].squeeze())
        
    chief_complaints = ", ".join(ccs)    
    
    if sex:
        pronoun = ["he","his"] if sex == "M" else ['she',"her"]
    else:
        pronoun = ["they", "their"]

    transfers = get_transfers(discharge_row['hadm_id'])
    transfers = transfers[~transfers['careunit'].isna()]
    careunits = transfers['careunit'].tolist()
    
    # transfers with dates
    tranfers = get_transfers(discharge_row['hadm_id'])
    diags = get_diags(discharge_row['hadm_id'])

    # get GPT-created SOAP Notes
    soap_notes = get_soap_notes(discharge_row['hadm_id'])
    # get radiology reports
    radiology_reps = get_radiology_reports(discharge_row['hadm_id'])

    if shots:
        prompt = f"___ is a {age} year old {sex} presenting to the ED with {chief_complaints}. Over the course of {pronoun[1]} hospital course, ___ started at {careunits[0]} and then visited {', '.join(careunits[1:])}. Over the course of their hospital stay, {pronoun[0]} was given the following diagnoses: {', '.join(diags['long_title'])} in order of importance to this admission.\n----------------------------------------------------The patient received the following radiology consult reports:{'----------------'.join(radiology_reps['text'].tolist())}.\n----------------------------------------------------Please use the following SOAP notes from the patient's complete hospital course to create a brief hospital course summary. Break down the hospital course summary by condition starting with a #: {'----------------'.join(soap_notes['gpt_SOAP_note'].tolist())}"
        
    else:
        # only support one shot right now
        prompt = f"___ is a {age} year old {sex} presenting to the ED with {chief_complaints}. Over the course of {pronoun[1]} hospital course, ___ started at {careunits[0]} and then visited {', '.join(careunits[1:])}. Over the course of their hospital stay, {pronoun[0]} was given the following diagnoses: {', '.join(diags['long_title'])} in order of importance to this admission.\n----------------------------------------------------The patient received the following radiology consult reports:{'----------------'.join(radiology_reps['text'].tolist())}.\n----------------------------------------------------Please use the following SOAP notes from the patient's complete hospital course to create a brief hospital course summary. Break down the hospital course summary by condition starting with a #: {'----------------'.join(soap_notes['gpt_SOAP_note'].tolist())} Use the following brief hospital course as an example: {shots[0]}"
    
    return prompt
    

In [17]:
bhc_prompt = create_pt_prompt(discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze())

## Create SOAP notes from GPT API (0-shot)

In [18]:
import openai
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2023-07-01-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")
# engine = "decile-gpt-35-turbo-16k"
engine = "decile-gpt-4-128K"


In [19]:
message_text = [{"role":"system","content":f"You are the discharging physician that is reviewing a patient's SOAP notes over their hospital course, starting in the ED and being discharged from your unit and writing a brief hospital course summary based on this information. Begin with a brief overview of the patient's demographics, ED chief complaint, and past medical history. Then, write your hospital course summary by condition starting with a #: "},]

gpt_bhc_prompt = {"role":"user",
                 "content":bhc_prompt}

message_text.append(gpt_bhc_prompt)

print(f"Brief Hospital Course Prompt: {message_text}")

Brief Hospital Course Prompt: [{'role': 'system', 'content': "You are the discharging physician that is reviewing a patient's SOAP notes over their hospital course, starting in the ED and being discharged from your unit and writing a brief hospital course summary based on this information. Begin with a brief overview of the patient's demographics, ED chief complaint, and past medical history. Then, write your hospital course summary by condition starting with a #: "}, {'role': 'user', 'content': "___ is a 62 year old M presenting to the ED with R Flank pain. Over the course of his hospital course, ___ started at Emergency Department and then visited Medicine, Medicine. Over the course of their hospital stay, he was given the following diagnoses: Infection and inflammatory reaction due to other urinary catheter, initial encounter, Sepsis, unspecified organism, Secondary malignant neoplasm of bladder, Type 2 diabetes mellitus without complications, Chronic obstructive pulmonary disease, 

In [20]:
completion = openai.ChatCompletion.create(
  engine=engine,
  messages = message_text,
)

In [21]:
zero_shot_output = completion['choices'][0]['message']['content']; print(zero_shot_output)

Patient Summary:
Mr. [Name], a 62-year-old male, initially presented to the Emergency Department (ED) with chief complaints of right flank pain. He has a complex medical history, including past hospitalizations and treatments for numerous conditions.

Hospital Course by Condition:

# Infection and inflammatory reaction due to urinary catheter, initial encounter
The patient presented with an infection related to urinary catheter use. He was treated with appropriate antibiotics, and the urinary catheter was managed as per the infection control protocol.

# Sepsis, unspecified organism
The management in the ED and subsequent inpatient stay focused on stabilizing the patient's septic condition with appropriate intravenous fluids, antibiotics, and close monitoring.

# Secondary malignant neoplasm of bladder
Oncology consultation was sought, and further investigations and management plans for the bladder malignancy were scheduled post-discharge.

# Type 2 diabetes mellitus without complicati

## Create SOAP notes from GPT API (1-shot)

In [22]:
import openai
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2023-07-01-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")
engine = "decile-gpt-35-turbo-16k"


In [23]:
targets = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/discharge_target.csv.gz')

In [24]:
bhc_oneshot_sample = targets.sample(1)['brief_hospital_course'].squeeze()

In [44]:
message_text2 = [{"role":"system","content":f"You are the discharging physician that is reviewing a patient's SOAP notes over their hospital course, starting in the ED and being discharged from your unit and writing a brief hospital course summary based on this information. Using the following brief hospital course as an example: {bhc_oneshot_sample}\n\nBegin with a brief overview of the patient's demographics, ED chief complaint, and past medical history. Then, write your hospital course summary by condition starting with a #."},]

message_text2.append(gpt_bhc_prompt)

print(f"Brief Hospital Course Prompt: {message_text2}")

Brief Hospital Course Prompt: [{'role': 'system', 'content': "You are the discharging physician that is reviewing a patient's SOAP notes over their hospital course, starting in the ED and being discharged from your unit and writing a brief hospital course summary based on this information. Using the following brief hospital course as an example: Summary: ___ h/o IVDU, tobacco use who presents with 10 days of \nprogressive back pain. MRI, elevated CRP/ESR concerning for \nosteomyelitis. \n\n# Osteomyelitis L2/L3 \n# ___ intermittent weakness/numbness\nPatient was started on Vancomycin/Ceftriaxone x 1 at an outside \nhospital and transferred to ___ for neurosurgical evaluation. \nHis MRI showed cortical destruction on L2-3 with enhancing soft \ntissue with edema. No neurosurgical interventions indicated. \nAntibiotics were held until a CT guided bone biopsy on ___. \nPatient was started on Vancomycin/Cefepime on ___.  Cefepime \nwas given for concern of pseudomonas (use of water to clean

In [45]:
completion2 = openai.ChatCompletion.create(
  engine=engine,
  messages = message_text2,
)

In [46]:
one_shot_output = completion2['choices'][0]['message']['content']; print(one_shot_output)

# Infection and inflammatory reaction due to other urinary catheter, sepsis, secondary malignant neoplasm of bladder, type 2 diabetes mellitus without complications, chronic obstructive pulmonary disease, unspecified, urinary tract infection, site not specified, unspecified medical devices associated with adverse incidents, unspecified place in unspecified non-institutional (private) residence as the place of occurrence of the external cause, hyperlipidemia, unspecified, essential (primary) hypertension, atherosclerotic heart disease of native coronary artery without angina pectoris, personal history of other malignant neoplasm of kidney, benign prostatic hyperplasia without lower urinary tract symptoms, gastro-esophageal reflux disease without esophagitis, personal history of nicotine dependence, major depressive disorder, single episode, unspecified, and other chronic pain:

The patient, a 62-year-old male, presented to the ED with complaints of right flank pain. During his hospital 

## Compute ROUGE Scores from Output

In [48]:
from rouge_score import rouge_scorer


In [49]:
# discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze()
pt_target = targets[targets['hadm_id'] == soap_notes.iloc[0]['hadm_id']]['brief_hospital_course'].squeeze()

In [50]:
scorer = rouge_scorer.RougeScorer(['rouge1',  "rouge2", 'rougeL'], use_stemmer=True)
scores1 = scorer.score(zero_shot_output,
                      pt_target)

In [51]:
scores1

{'rouge1': Score(precision=0.4174757281553398, recall=0.2275132275132275, fmeasure=0.2945205479452055),
 'rouge2': Score(precision=0.02922077922077922, recall=0.015901060070671377, fmeasure=0.020594965675057208),
 'rougeL': Score(precision=0.15857605177993528, recall=0.08641975308641975, fmeasure=0.11187214611872147)}

In [52]:
scorer = rouge_scorer.RougeScorer(['rouge1', "rouge2", 'rougeL'], use_stemmer=True)
scores2 = scorer.score(one_shot_output,
                      pt_target)

In [53]:
scores2

{'rouge1': Score(precision=0.2621359223300971, recall=0.20149253731343283, fmeasure=0.22784810126582278),
 'rouge2': Score(precision=0.012987012987012988, recall=0.00997506234413965, fmeasure=0.011283497884344146),
 'rougeL': Score(precision=0.11003236245954692, recall=0.0845771144278607, fmeasure=0.09563994374120957)}

# Qualitative Eval

## Final outputs


In [54]:
pd.DataFrame.from_records([scores1, scores2], index=["zero-shot", "one-shot"]).to_csv(output3)

In [55]:
eval = pd.Series([one_shot_output, zero_shot_output, pt_target], index=["gpt-one-shot", "gpt-zero-shot", "gold-standard"])

In [56]:
def add_line_breaks(text):
    return text.replace(r'\n', r'<br>')

In [57]:
output_str = eval.to_frame().to_html(escape=False).replace(r"\n","<br>")
with open(output2, 'w') as file:
    file.write(output_str)
    


In [58]:
print(one_shot_output)

# Infection and inflammatory reaction due to other urinary catheter, sepsis, secondary malignant neoplasm of bladder, type 2 diabetes mellitus without complications, chronic obstructive pulmonary disease, unspecified, urinary tract infection, site not specified, unspecified medical devices associated with adverse incidents, unspecified place in unspecified non-institutional (private) residence as the place of occurrence of the external cause, hyperlipidemia, unspecified, essential (primary) hypertension, atherosclerotic heart disease of native coronary artery without angina pectoris, personal history of other malignant neoplasm of kidney, benign prostatic hyperplasia without lower urinary tract symptoms, gastro-esophageal reflux disease without esophagitis, personal history of nicotine dependence, major depressive disorder, single episode, unspecified, and other chronic pain:

The patient, a 62-year-old male, presented to the ED with complaints of right flank pain. During his hospital 

In [59]:
print(zero_shot_output)

Patient Summary:
Mr. [Name], a 62-year-old male, initially presented to the Emergency Department (ED) with chief complaints of right flank pain. He has a complex medical history, including past hospitalizations and treatments for numerous conditions.

Hospital Course by Condition:

# Infection and inflammatory reaction due to urinary catheter, initial encounter
The patient presented with an infection related to urinary catheter use. He was treated with appropriate antibiotics, and the urinary catheter was managed as per the infection control protocol.

# Sepsis, unspecified organism
The management in the ED and subsequent inpatient stay focused on stabilizing the patient's septic condition with appropriate intravenous fluids, antibiotics, and close monitoring.

# Secondary malignant neoplasm of bladder
Oncology consultation was sought, and further investigations and management plans for the bladder malignancy were scheduled post-discharge.

# Type 2 diabetes mellitus without complicati

In [60]:
print(pt_target)

___ yo M with hx COPD, renal ca, bladder cancer with chronic 
indwelling foley now presenting with flankpain and abnormal UA 
with VRE growing in urine cxs.

ACTIVE ISSUES
# Sepsis
# VRE UTI
Has chronic indwelling Foley due to known bladder cancer. 
Refused exchange of Foley, as noted this was done just prior to 
presentation at ___ (no documentation of this found in ___ 
record). Was non-toxic appearing and vitals stabilized on 
admission. Urine culture grew VRE. Treated with linezolid x7 
days (to continue at home) -- HELD escitalopram until ___ to 
avoid Serotonin syndrome. Patient counseled on this. 
Patient improved from sepsis rapidly, though WBC still slightly 
elevated. Clinically sepsis resolved, and infection improving. 
Patient very angered and believed was never seen by physician at 
___ (though documented 2 visits with MD). Demanded to leave 
hospital. Patient was medically stable for discharge; reviewed 
with patient medical plan on discharge. An Rx was generated for 
lin

## SOAP Notes

In [42]:
print("\n----------------------------------------------------------------------\n")
print(soap_notes['gpt_SOAP_note'].tolist()[0])
print("\n----------------------------------------------------------------------\n")
print(soap_notes['gpt_SOAP_note'].tolist()[1])


----------------------------------------------------------------------

S: The patient, a 62-year-old male, presented to the Emergency Department (ED) with complaints of right flank pain. Upon further evaluation and treatment, the following diagnoses were made: Infection and inflammatory reaction due to other urinary catheter, initial encounter, Sepsis, unspecified organism, Secondary malignant neoplasm of bladder, Type 2 diabetes mellitus without complications, Chronic obstructive pulmonary disease, unspecified, Urinary tract infection, site not specified, Unspecified medical devices associated with adverse incidents, Unspecified place in unspecified non-institutional (private) residence as the place of occurrence of the external cause, Hyperlipidemia, unspecified, Essential (primary) hypertension, Atherosclerotic heart disease of native coronary artery without angina pectoris, Personal history of other malignant neoplasm of kidney, Benign prostatic hyperplasia without lower urinary 

## Full Discharge Summary

In [43]:
print(discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']]['text'].squeeze())

 
Name:  ___                   Unit No:   ___
 
Admission Date:  ___              Discharge Date:   ___
 
Date of Birth:  ___             Sex:   M
 
Service: MEDICINE
 
Allergies: 
terazosin / doxazosin / chocolate flavor / montelukast / 
tamsulosin / garlic
 
Attending: ___.
 
Chief Complaint:
flank pain
 
Major Surgical or Invasive Procedure:
None

 
History of Present Illness:
 Mr ___ is a ___ with history of
BPH, kidney cancer s/p L nephrectomy ___ years ago, known 
bladder
mets and recurrent UTIs with foley in place due to obstructive
uropathy from bladder masses who presents with ___ days R flank
and back pain extending to the R groin. No fevers or frank
pyuria. Intermittent hematuria. UCX drawn in the ED on ___
growing VRE sensitive only to linezolid.  Pt had not yet been
started on abx on arrival to the ED.  

In the ED, initial vitals were: 97.2 114 158/98 16 99% RA.  Labs
were notable for WBC 20.1, positive UA.   He was found to have
R-sided abd pain and CVA tenderness.  CTAP