In [1]:
soap_notes_fp = "sample_output1.csv"
bhc_html_fp = "eval.csv"
scores_fp = "scores.csv"

In [2]:
# Parameters
soap_notes_fp = "soap_note_sample1test_10415772-DS-14.csv"
bhc_html_fp = "sample1html_10415772-DS-14.html"
scores_fp = "sample1scores_10415772-DS-14.csv"


# Brief Hospital Course Generation

This notebook performs step 2 of the Brief Hospital Course pipeline, in which we generate a brief hospital course from a set of service-level SOAP notes passed into a GPT-3.5 model. 

In [3]:
%load_ext autoreload
%autoreload 2

In [7]:
import pandas as pd
import ast
import numpy as np

from tqdm import tqdm
# from tqdm.auto import tqdm  # for notebooks
tqdm.pandas()

import os
import openai

In [5]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

True

In [6]:
from prompt_functions import create_brief_hospital_course_prompts
from openai_utils import num_tokens_from_string, completion_with_backoff

In [7]:
import openai
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2023-07-01-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")
# engine = "decile-gpt-35-turbo-16k"
engine = "decile-gpt-4-128K"


## Read in GPT-Generated SOAP Notes

In [8]:
soap_notes = pd.read_csv(soap_notes_fp)

## Read in Prompts

In [9]:
prompts = pd.read_csv("bhc_prompts.csv")

In [10]:
prompts

Unnamed: 0,prompt_name,prompt
0,bhc_prompt_zero_shot,___ is a {age} year old {sex} presenting to th...
1,bhc_prompt_n_shot,___ is a {age} year old {sex} presenting to th...
2,bhc_system_message,You are the discharging physician that is revi...


## Read in Radiology Reports

In [11]:
radiology = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/radiology.csv.gz")

## Read in Encounter-Level Structured Data

In [12]:
###### 
# Challenge Data

# discharge summaries
discharges = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/discharge.csv.gz")

# ed stays
edstays = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/edstays.csv.gz')

# triage
triage = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/triage.csv.gz')

###### 
# MIMIC-IV Data

# ward transfers
transfers = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_transfers.pkl')

# higher-level services (ICU, CARD, etc)
services = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_services.pkl')

# get patient info
pts = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_patients.pkl')

# admission demographics
admissions = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_admissions.pkl')

In [13]:
# diagnoses
diags = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_diagnoses_icd.pkl')

### Clean up/type cast data

In [14]:
discharges = discharges.astype({"charttime":"datetime64[ns]",
                               "storetime":"datetime64[ns]"})

## Generate SOAP Note List Prompt

In [15]:
soap_notes

Unnamed: 0.1,Unnamed: 0,subject_id,hadm_id,transfer_id,eventtype,careunit,intime,outtime,service_prompts,gpt_SOAP_note
0,76105,10415772.0,20648185.0,35979851.0,ED,Emergency Department,2180-10-31 11:42:00,2180-10-31 20:10:00,___ is a 69 year old F that initially presente...,"S: The patient, ___, is a 69-year-old female w..."
1,76104,10415772.0,20648185.0,34016963.0,admit,Med/Surg,2180-10-31 20:10:00,2180-11-01 16:36:57,___ is a 69 year old F that initially presente...,"S: The patient, ___, is a 69-year-old female w..."


In [16]:
discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze()

note_id                                          10415772-DS-14
subject_id                                             10415772
hadm_id                                                20648185
note_type                                                    DS
note_seq                                                     14
charttime                                   2180-11-01 00:00:00
storetime                                   2180-11-01 16:42:00
text           \nName:  ___                    Unit No:   __...
Name: 2640, dtype: object

## Create SOAP notes from GPT API (0-shot)

In [17]:
bhc_prompt_zeroshot = create_brief_hospital_course_prompts(discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze(),
                                                  soap_notes, prompts, 
                                                  edstays, radiology, 
                                                  pts, triage, transfers, diags,
                                                  shots=None
                                                 )

In [18]:
num_tokens_from_string(bhc_prompt_zeroshot, "cl100k_base")

1734

In [19]:
messages = [{"role":"system","content":prompts.loc[prompts['prompt_name'] == "bhc_system_message", "prompt"].squeeze()}]

gpt_bhc_prompt = {"role":"user",
                 "content":bhc_prompt_zeroshot}

messages.append(gpt_bhc_prompt)

print(f"Brief Hospital Course 1-shot Prompt: {messages}")

Brief Hospital Course 1-shot Prompt: [{'role': 'system', 'content': "You are the discharging physician that is reviewing a patient's SOAP notes over their hospital course, starting in the ED and being discharged from your unit and writing a brief hospital course summary based on this information."}, {'role': 'user', 'content': '___ is a {age} year old {sex} presenting to the ED with ETOH. The patient visited the following wards in order: Emergency Department, Med/Surg. Over the course of their hospital stay, ___ was given a set of diagnoses, radiology reports from consulting physicians, and a set of daily SOAP notes. Please use these SOAP notes to create a brief hospital course summary. Refer to the patient by three underscores (___). The structure of the brief hospital course summary should be:\r\n\r\n"""\r\n[patient introduction including age, sex, chief complaints, and relevant past medical history]\r\n\r\n# Condition 1: medications, vitals, abnormal labs and cultures, and procedure

In [20]:
zeroshot_completion = completion_with_backoff(engine=engine,
                        messages=messages,
                        )


In [21]:
zero_shot_output = zeroshot_completion['choices'][0]['message']['content']; print(zero_shot_output)

"""
___ is a 69-year-old female presenting to the ED with acute alcoholic intoxication. Her past medical history includes alcoholism, chronic airway obstruction consistent with underlying emphysema, personal history of tobacco use, and an unspecified accident.

# Condition 1: Acute Alcoholic Intoxication and Alcohol Withdrawal
Medications: Diazepam 5 mg was administered for alcohol withdrawal. The patient was also provided with nutritional supplements including Thiamine 100mg/mL, Folic Acid 5mg/mL, and a multivitamin IV.
Vitals & Labs: Regular monitoring of vital signs was conducted, though specific values were not provided.
Procedures: No significant procedures were performed during this time.

# Condition 2: Chronic Airway Obstruction and Hypoxemia
Medications: Fluticasone-Salmeterol Diskus (250/50) 250/50mcg and Albuterol 0.083% Neb Soln were administered for chronic airway obstruction.
Vitals: The patient was dependent on supplemental oxygen due to hypoxemia.
Labs: No specific lab 

## Create SOAP notes from GPT API (2-shot)

It's really one shot I'm just testing two of the same example

In [8]:
targets = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/discharge_target.csv.gz')

In [23]:
bhc_oneshot_sample = targets.sample(1)['brief_hospital_course'].squeeze()

In [24]:
bhc_prompt_2shot = create_brief_hospital_course_prompts(discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze(),
                                                  soap_notes, prompts, 
                                                  edstays, radiology, 
                                                  pts, triage, transfers, diags,
                                                  shots=[bhc_oneshot_sample, bhc_oneshot_sample]
                                                 )

In [25]:
num_tokens_from_string(bhc_prompt_2shot, "cl100k_base")

2473

In [26]:
messages = [{"role":"system","content":prompts.loc[prompts['prompt_name'] == "bhc_system_message", "prompt"].squeeze()}]

gpt_bhc_prompt = {"role":"user",
                 "content":bhc_prompt_2shot}

messages.append(gpt_bhc_prompt)

print(f"Brief Hospital Course N-shot Prompt: {messages}")

Brief Hospital Course N-shot Prompt: [{'role': 'system', 'content': "You are the discharging physician that is reviewing a patient's SOAP notes over their hospital course, starting in the ED and being discharged from your unit and writing a brief hospital course summary based on this information."}, {'role': 'user', 'content': '___ is a {age} year old {sex} presenting to the ED with ETOH. The patient visited the following wards in order: Emergency Department, Med/Surg. Over the course of their hospital stay, ___ was given a set of diagnoses, radiology reports from consulting physicians, and a set of daily SOAP notes. Please use these SOAP notes to create a brief hospital course summary. Refer to the patient by three underscores (___). The structure of the brief hospital course summary should be:\r\n\r\n"""\r\n[patient introduction including age, sex, chief complaints, and relevant past medical history]\r\n\r\n# Condition 1: medications, vitals, abnormal labs and cultures, and procedure

In [27]:
nshot_completion = completion_with_backoff(engine=engine,
                        messages=messages,
                        )


In [28]:
n_shot_output = nshot_completion['choices'][0]['message']['content']; print(n_shot_output)

___ is a 69-year-old female who presented to the ED with acute alcoholic intoxication and subsequent alcohol withdrawal. Her past medical history is significant for other specified cardiac dysrhythmias, hypoxemia, an open wound of the back, chronic airway obstruction likely secondary to smoking, and was found to be dependent on supplemental oxygen. She suffered an unspecified accident and has a personal history of tobacco use.

# Acute alcoholic intoxication and alcohol withdrawal: 
Medications administered included Fluticasone-Salmeterol Diskus for chronic airway obstruction and Heparin for prophylaxis against possible thromboembolic events. To manage alcohol withdrawal, Diazepam 5 mg was administered orally. She did not undergo any specific procedures for these conditions in the ED. Monitoring in the ED included observation of vital signs, cardiac rhythm, wound care, and spirometry.

# Cardiac dysrhythmias and hypoxemia: 
The patient received Albuterol 0.083% nebulization and supplem

In [29]:
n_shot_output

"___ is a 69-year-old female who presented to the ED with acute alcoholic intoxication and subsequent alcohol withdrawal. Her past medical history is significant for other specified cardiac dysrhythmias, hypoxemia, an open wound of the back, chronic airway obstruction likely secondary to smoking, and was found to be dependent on supplemental oxygen. She suffered an unspecified accident and has a personal history of tobacco use.\n\n# Acute alcoholic intoxication and alcohol withdrawal: \nMedications administered included Fluticasone-Salmeterol Diskus for chronic airway obstruction and Heparin for prophylaxis against possible thromboembolic events. To manage alcohol withdrawal, Diazepam 5 mg was administered orally. She did not undergo any specific procedures for these conditions in the ED. Monitoring in the ED included observation of vital signs, cardiac rhythm, wound care, and spirometry.\n\n# Cardiac dysrhythmias and hypoxemia: \nThe patient received Albuterol 0.083% nebulization and 

## Compute ROUGE Scores from Output

In [2]:
from rouge_score import rouge_scorer


In [31]:
# discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze()
pt_target = targets[targets['hadm_id'] == soap_notes.iloc[0]['hadm_id']]['brief_hospital_course'].squeeze()

In [32]:
scorer = rouge_scorer.RougeScorer(['rouge1',  "rouge2", 'rougeL'], use_stemmer=True)
scores1 = scorer.score(zero_shot_output,
                      pt_target)

In [33]:
scores1

{'rouge1': Score(precision=0.6666666666666666, recall=0.022988505747126436, fmeasure=0.044444444444444446),
 'rouge2': Score(precision=0.18181818181818182, recall=0.005763688760806916, fmeasure=0.011173184357541898),
 'rougeL': Score(precision=0.5833333333333334, recall=0.020114942528735632, fmeasure=0.03888888888888889)}

In [34]:
scorer = rouge_scorer.RougeScorer(['rouge1', "rouge2", 'rougeL'], use_stemmer=True)
scores2 = scorer.score(n_shot_output,
                      pt_target)

In [35]:
scores2

{'rouge1': Score(precision=0.6666666666666666, recall=0.02203856749311295, fmeasure=0.042666666666666665),
 'rouge2': Score(precision=0.18181818181818182, recall=0.0055248618784530384, fmeasure=0.010723860589812331),
 'rougeL': Score(precision=0.5, recall=0.01652892561983471, fmeasure=0.03200000000000001)}

# Qualitative Eval

## Final outputs


In [36]:
pd.DataFrame.from_records([scores1, scores2], index=["zero-shot", "2-shot"]).to_csv(scores_fp)

In [37]:
eval = pd.Series([n_shot_output, zero_shot_output, pt_target], index=["gpt-2-shot", "gpt-zero-shot", "gold-standard"])

In [38]:
def add_line_breaks(text):
    return text.replace(r'\n', r'<br>')

In [39]:
output_str = eval.to_frame().to_html(escape=False).replace(r"\n","<br>")
with open(bhc_html_fp, 'w') as file:
    file.write(output_str)
    


In [40]:
print(n_shot_output)

___ is a 69-year-old female who presented to the ED with acute alcoholic intoxication and subsequent alcohol withdrawal. Her past medical history is significant for other specified cardiac dysrhythmias, hypoxemia, an open wound of the back, chronic airway obstruction likely secondary to smoking, and was found to be dependent on supplemental oxygen. She suffered an unspecified accident and has a personal history of tobacco use.

# Acute alcoholic intoxication and alcohol withdrawal: 
Medications administered included Fluticasone-Salmeterol Diskus for chronic airway obstruction and Heparin for prophylaxis against possible thromboembolic events. To manage alcohol withdrawal, Diazepam 5 mg was administered orally. She did not undergo any specific procedures for these conditions in the ED. Monitoring in the ED included observation of vital signs, cardiac rhythm, wound care, and spirometry.

# Cardiac dysrhythmias and hypoxemia: 
The patient received Albuterol 0.083% nebulization and supplem

In [41]:
print(zero_shot_output)

"""
___ is a 69-year-old female presenting to the ED with acute alcoholic intoxication. Her past medical history includes alcoholism, chronic airway obstruction consistent with underlying emphysema, personal history of tobacco use, and an unspecified accident.

# Condition 1: Acute Alcoholic Intoxication and Alcohol Withdrawal
Medications: Diazepam 5 mg was administered for alcohol withdrawal. The patient was also provided with nutritional supplements including Thiamine 100mg/mL, Folic Acid 5mg/mL, and a multivitamin IV.
Vitals & Labs: Regular monitoring of vital signs was conducted, though specific values were not provided.
Procedures: No significant procedures were performed during this time.

# Condition 2: Chronic Airway Obstruction and Hypoxemia
Medications: Fluticasone-Salmeterol Diskus (250/50) 250/50mcg and Albuterol 0.083% Neb Soln were administered for chronic airway obstruction.
Vitals: The patient was dependent on supplemental oxygen due to hypoxemia.
Labs: No specific lab 

In [42]:
print(pt_target)

___ year-old woman with COPD on home O2 presenting with alcohol 
intoxication.  


## SOAP Notes

In [43]:
print("\n----------------------------------------------------------------------\n")
print(soap_notes['gpt_SOAP_note'].tolist()[0])
print("\n----------------------------------------------------------------------\n")
print(soap_notes['gpt_SOAP_note'].tolist()[1])


----------------------------------------------------------------------

S: The patient, ___, is a 69-year-old female who was initially admitted to the Emergency Department (ED) due to acute alcoholic intoxication and alcohol withdrawal. During her hospital stay in the ED, she was also diagnosed with other conditions including other specified cardiac dysrhythmias, hypoxemia, open wound of the back, nonspecific abnormal finding of lung field, chronic airway obstruction, other dependence on machines (supplemental oxygen), personal history of tobacco use, and unspecified accident.

O: No procedures were performed during this part of the patient's hospital course.

During this time, the patient was initiated on Fluticasone-Salmeterol Diskus (250/50) 250/50mcg Diskus for chronic airway obstruction. Additionally, Heparin 5000 Units/mL- 1mL vial was administered, possibly due to the patient being at risk for thromboembolic events.

No abnormal labs or microbiology cultures were documented dur

## Full Discharge Summary

In [44]:
print(discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']]['text'].squeeze())

 
Name:  ___                    Unit No:   ___
 
Admission Date:  ___              Discharge Date:   ___
 
Date of Birth:  ___             Sex:   F
 
Service: MEDICINE
 
Allergies: 
No Known Allergies / Adverse Drug Reactions
 
Attending: ___.
 
Chief Complaint:
Alcohol intoxication
 
Major Surgical or Invasive Procedure:
None

 
History of Present Illness:
Ms. ___ ___ year-old woman with a history of COPD 4L home O2 
requirement at night presenting with acute alcohol intoxication. 
She was brought in by EMS for being out on the streets and was 
having difficulty ambulating. She states that she did not to 
leave home with her oxygen because she wanted to go out "to get 
some booze". She states that she had about half a pint of vodka 
today. Of note, she has a recent medical ICU admission for 
hypoxemia. She denies any evidence of trauma and has no 
complaints, denies any head, neck, back pain.  
On arrival to the ED, initial vitals were 99.8 117 126/61 89% 
RA. Labs were notable for WB