In [1]:
soap_notes_fp = "sample_output1.csv"
bhc_html_fp = "eval.csv"
scores_fp = "scores.csv"

# Brief Hospital Course Generation

This notebook performs step 2 of the Brief Hospital Course pipeline, in which we generate a brief hospital course from a set of service-level SOAP notes passed into a GPT-3.5 model. 

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
import ast
import numpy as np

from tqdm import tqdm
# from tqdm.auto import tqdm  # for notebooks
tqdm.pandas()

import os
import openai

In [4]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

True

In [123]:
from prompt_functions import create_brief_hospital_course_prompts
from openai_utils import num_tokens_from_string, completion_with_backoff

In [118]:
import openai
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2023-07-01-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")
# engine = "decile-gpt-35-turbo-16k"
engine = "decile-gpt-4-128K"


## Read in GPT-Generated SOAP Notes

In [6]:
soap_notes = pd.read_csv(soap_notes_fp)

## Read in Prompts

In [63]:
prompts = pd.read_csv("bhc_prompts.csv")

In [8]:
prompts

Unnamed: 0,prompt_name,prompt
0,bhc_prompt_zero_shot,___ is a {age} year old {sex} presenting to th...
1,bhc_prompt_n_shot,___ is a {age} year old {sex} presenting to th...
2,bhc_system_message,You are the discharging physician that is revi...


## Read in Radiology Reports

In [9]:
radiology = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/radiology.csv.gz")

## Read in Encounter-Level Structured Data

In [10]:
###### 
# Challenge Data

# discharge summaries
discharges = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/discharge.csv.gz")

# ed stays
edstays = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/edstays.csv.gz')

# triage
triage = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/triage.csv.gz')

###### 
# MIMIC-IV Data

# ward transfers
transfers = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_transfers.pkl')

# higher-level services (ICU, CARD, etc)
services = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_services.pkl')

# get patient info
pts = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_patients.pkl')

# admission demographics
admissions = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_admissions.pkl')

In [11]:
# diagnoses
diags = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_diagnoses_icd.pkl')

### Clean up/type cast data

In [12]:
discharges = discharges.astype({"charttime":"datetime64[ns]",
                               "storetime":"datetime64[ns]"})

## Generate SOAP Note List Prompt

In [13]:
soap_notes

Unnamed: 0.1,Unnamed: 0,subject_id,hadm_id,transfer_id,eventtype,careunit,intime,outtime,service_prompts,gpt_SOAP_note
0,1864269,19860951,25275183.0,31332268,ED,Emergency Department,2140-12-29 14:28:00,2140-12-29 15:47:23,___ is a 40 year old M that initially presente...,"Subjective:\nThe patient, a 40-year-old male, ..."
1,1864271,19860951,25275183.0,31753599,admit,Emergency Department Observation,2140-12-29 15:47:23,2140-12-29 19:25:01,___ is a 40 year old M that initially presente...,Subjective:\n___ is a 40-year-old male who was...
2,1864270,19860951,25275183.0,31727926,transfer,Medicine,2140-12-29 19:25:01,2140-12-29 20:21:48,___ is a 40 year old M that initially presente...,"Subjective:\nThe patient, ___, is a 40-year-ol..."
3,1864274,19860951,25275183.0,39254309,transfer,Discharge Lounge,2140-12-29 20:21:48,2140-12-29 20:24:37,___ is a 40 year old M that initially presente...,Patient Name: [Patient Name]\nAge: 40 years\nS...
4,1864275,19860951,25275183.0,39666777,transfer,Medicine,2140-12-29 20:24:37,2140-12-29 20:25:41,___ is a 40 year old M that initially presente...,"Subjective:\nThe patient, a 40-year-old male, ..."
5,1864268,19860951,25275183.0,30264230,transfer,Emergency Department Observation,2140-12-29 20:25:41,2140-12-30 11:09:15,___ is a 40 year old M that initially presente...,S: The patient is a 40-year-old male who prese...
6,1864272,19860951,25275183.0,36104314,transfer,Medicine,2140-12-30 11:09:15,2141-01-01 18:30:04,___ is a 40 year old M that initially presente...,"Subjective:\nThe patient, a 40-year-old male, ..."


In [14]:
discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze()

note_id                                          19860951-DS-14
subject_id                                             19860951
hadm_id                                                25275183
note_type                                                    DS
note_seq                                                     14
charttime                                   2141-01-01 00:00:00
storetime                                   2141-01-01 20:55:00
text           \nName:  ___            Unit No:   ___\n \nAd...
Name: 67858, dtype: object

## Create SOAP notes from GPT API (0-shot)

In [129]:
bhc_prompt_zeroshot = create_brief_hospital_course_prompts(discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze(),
                                                  soap_notes, prompts, 
                                                  edstays, radiology, 
                                                  pts, triage, transfers, diags,
                                                  shots=None
                                                 )

In [130]:
num_tokens_from_string(bhc_prompt_zeroshot, "cl100k_base")

7068

In [131]:
messages = [{"role":"system","content":prompts.loc[prompts['prompt_name'] == "bhc_system_message", "prompt"].squeeze()}]

gpt_bhc_prompt = {"role":"user",
                 "content":bhc_prompt_zeroshot}

messages.append(gpt_bhc_prompt)

print(f"Brief Hospital Course 1-shot Prompt: {messages}")

Brief Hospital Course 1-shot Prompt: [{'role': 'system', 'content': "You are the discharging physician that is reviewing a patient's SOAP notes over their hospital course, starting in the ED and being discharged from your unit and writing a brief hospital course summary based on this information."}, {'role': 'user', 'content': '___ is a {age} year old {sex} presenting to the ED with Hyperkalemia. The patient visited the following wards in order: Emergency Department, Emergency Department Observation, Medicine, Discharge Lounge, Medicine, Emergency Department Observation, Medicine. Over the course of their hospital stay, ___ was given a set of diagnoses, radiology reports from consulting physicians, and a set of daily SOAP notes. Please use these SOAP notes to create a brief hospital course summary. Refer to the patient by three underscores (___). The structure of the brief hospital course summary should be:\r\n\r\n"""\r\n[patient introduction including age, sex, chief complaints, and r

In [132]:
zeroshot_completion = completion_with_backoff(engine=engine,
                        messages=message_text,
                        )


In [133]:
zero_shot_output = zeroshot_completion['choices'][0]['message']['content']; print(zero_shot_output)

To create a summary of the patient's hospital course based on the SOAP notes from the Emergency Department (ED) to discharge, the following framework would be used, highlighting the essential information throughout the patient's stay. 

Subjective: This section will summarize the patient's primary complaints, history of the presenting illness, any relevant personal or family history, and other pertinent information the patient provided upon admission and during their stay.

Objective: This part will synthesize findings from physical exams, vital signs, laboratory results, imaging studies, and any other diagnostic tests performed.

Assessment: Here, I will summarize the diagnoses or differentials considered throughout the patient's hospital course, including how the patient’s condition evolved over time and responses to treatments or interventions.

Plan: This section will outline the treatments administered, including medications, surgical procedures if any, therapeutic interventions, 

## Create SOAP notes from GPT API (1-shot)

In [134]:
targets = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/discharge_target.csv.gz')

In [138]:
bhc_oneshot_sample = targets.sample(1)['brief_hospital_course'].squeeze()

In [139]:
bhc_prompt_nshot = create_brief_hospital_course_prompts(discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze(),
                                                  soap_notes, prompts, 
                                                  edstays, radiology, 
                                                  pts, triage, transfers, diags,
                                                  shots=[bhc_oneshot_sample, bhc_oneshot_sample]
                                                 )

In [140]:
num_tokens_from_string(bhc_prompt_nshot, "cl100k_base")

7783

In [141]:
messages = [{"role":"system","content":prompts.loc[prompts['prompt_name'] == "bhc_system_message", "prompt"].squeeze()}]

gpt_bhc_prompt = {"role":"user",
                 "content":bhc_prompt_nshot}

messages.append(gpt_bhc_prompt)

print(f"Brief Hospital Course N-shot Prompt: {messages}")



In [142]:
nshot_completion = completion_with_backoff(engine=engine,
                        messages=message_text,
                        )


In [143]:
n_shot_output = nshot_completion['choices'][0]['message']['content']; print(n_shot_output)

Certainly. Below is an example of how one might construct a brief hospital course summary based on a review of a patient's SOAP (Subjective, Objective, Assessment, Plan) notes from their emergency department (ED) admission to discharge:

---

Patient Name: [Patient's Name]
MRN: [Medical Record Number]
Admission Date: [Admission Date]
Discharge Date: [Discharge Date]

**Hospital Course Summary:**

**ED Presentation:**
Subjective: Pt presented to the ED with complaints of acute onset of chest pain, shortness of breath, and diaphoresis starting approximately 2 hours prior to arrival.

Objective: Initial vitals showed BP 160/90, HR 110, O2 sat 90% on room air. ECG demonstrated ST elevations in anterior leads. Troponin level was elevated.

Assessment: Acute ST-Elevation Myocardial Infarction (STEMI).

Plan: Pt was given aspirin, nitroglycerin, and started on a heparin drip. Emergent cardiac catheterization was performed revealing significant LAD blockage which was successfully stented.

**H

In [144]:
n_shot_output

"Certainly. Below is an example of how one might construct a brief hospital course summary based on a review of a patient's SOAP (Subjective, Objective, Assessment, Plan) notes from their emergency department (ED) admission to discharge:\n\n---\n\nPatient Name: [Patient's Name]\nMRN: [Medical Record Number]\nAdmission Date: [Admission Date]\nDischarge Date: [Discharge Date]\n\n**Hospital Course Summary:**\n\n**ED Presentation:**\nSubjective: Pt presented to the ED with complaints of acute onset of chest pain, shortness of breath, and diaphoresis starting approximately 2 hours prior to arrival.\n\nObjective: Initial vitals showed BP 160/90, HR 110, O2 sat 90% on room air. ECG demonstrated ST elevations in anterior leads. Troponin level was elevated.\n\nAssessment: Acute ST-Elevation Myocardial Infarction (STEMI).\n\nPlan: Pt was given aspirin, nitroglycerin, and started on a heparin drip. Emergent cardiac catheterization was performed revealing significant LAD blockage which was success

## Compute ROUGE Scores from Output

In [145]:
from rouge_score import rouge_scorer


In [146]:
# discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']].squeeze()
pt_target = targets[targets['hadm_id'] == soap_notes.iloc[0]['hadm_id']]['brief_hospital_course'].squeeze()

In [147]:
scorer = rouge_scorer.RougeScorer(['rouge1',  "rouge2", 'rougeL'], use_stemmer=True)
scores1 = scorer.score(zero_shot_output,
                      pt_target)

In [148]:
scores1

{'rouge1': Score(precision=0.3380952380952381, recall=0.2508833922261484, fmeasure=0.2880324543610548),
 'rouge2': Score(precision=0.02386634844868735, recall=0.017699115044247787, fmeasure=0.020325203252032523),
 'rougeL': Score(precision=0.14523809523809525, recall=0.10777385159010601, fmeasure=0.12373225152129817)}

In [149]:
scorer = rouge_scorer.RougeScorer(['rouge1', "rouge2", 'rougeL'], use_stemmer=True)
scores2 = scorer.score(n_shot_output,
                      pt_target)

In [150]:
scores2

{'rouge1': Score(precision=0.30714285714285716, recall=0.3265822784810127, fmeasure=0.31656441717791417),
 'rouge2': Score(precision=0.045346062052505964, recall=0.048223350253807105, fmeasure=0.046740467404674045),
 'rougeL': Score(precision=0.11904761904761904, recall=0.12658227848101267, fmeasure=0.12269938650306748)}

# Qualitative Eval

## Final outputs


In [None]:
pd.DataFrame.from_records([scores1, scores2], index=["zero-shot", "one-shot"]).to_csv(output3)

In [None]:
eval = pd.Series([one_shot_output, zero_shot_output, pt_target], index=["gpt-one-shot", "gpt-zero-shot", "gold-standard"])

In [None]:
def add_line_breaks(text):
    return text.replace(r'\n', r'<br>')

In [None]:
output_str = eval.to_frame().to_html(escape=False).replace(r"\n","<br>")
with open(output2, 'w') as file:
    file.write(output_str)
    


In [None]:
print(one_shot_output)

In [None]:
print(zero_shot_output)

In [None]:
print(pt_target)

## SOAP Notes

In [None]:
print("\n----------------------------------------------------------------------\n")
print(soap_notes['gpt_SOAP_note'].tolist()[0])
print("\n----------------------------------------------------------------------\n")
print(soap_notes['gpt_SOAP_note'].tolist()[1])

## Full Discharge Summary

In [None]:
print(discharges[discharges['hadm_id'] == soap_notes.iloc[0]['hadm_id']]['text'].squeeze())