<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [26]</a>'.</span>

In [1]:
soap_notes_fp = "sample_output.csv"
note_id = "10000032-DS-22"

In [2]:
# Parameters
soap_notes_fp = "soap_note_sample1test_10415772-DS-14.csv"
note_id = "10415772-DS-14"


In [3]:
print("soap_notes_fp =", soap_notes_fp, "note_id =", note_id)

soap_notes_fp = soap_note_sample1test_10415772-DS-14.csv note_id = 10415772-DS-14


# SOAP Note Generation

This notebook performs step 1 of the Brief Hospital Course pipeline, in which we generate a SOAP note for each service based on a list of information that has been passed into a GPT-3.5 model. 

In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
import pandas as pd
import ast
import numpy as np

from tqdm import tqdm
# from tqdm.auto import tqdm  # for notebooks
tqdm.pandas()

import os
import openai

In [6]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

True

In [7]:
from preprocessing import *
from prompt_functions import create_pt_prompt_per_day, create_pt_prompt_per_service

## Read in Prompts

In [8]:
prompts = pd.read_csv("soap_note_prompts.csv")

## Read in Radiology Reports

In [9]:
radiology = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/radiology.csv.gz")

## Read in Structured Data

In [10]:
###### 
# Challenge Data

# discharge summaries
discharges = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/discharge.csv.gz")

# ed stays
edstays = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/edstays.csv.gz')

# triage
triage = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/triage.csv.gz')

###### 
# MIMIC-IV Data

# ward transfers
transfers = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_transfers.pkl')

# higher-level services (ICU, CARD, etc)
services = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_services.pkl')

# get patient info
pts = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_patients.pkl')

# admission demographics
admissions = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_admissions.pkl')

# procedures
procs = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_procedures_icd.pkl')

# diagnoses
diags = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_diagnoses_icd.pkl')

# meds
med_orders = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_emar.pkl')


In [11]:
prescriptions = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_prescriptions.pkl')
labs = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_labevents.pkl')
microbio = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_microbiologyevents.pkl')

### Clean up/type cast data

In [12]:
transfers.loc['hadm_id'] = pd.to_numeric(transfers["hadm_id"], errors="ignore")

  transfers.loc['hadm_id'] = pd.to_numeric(transfers["hadm_id"], errors="ignore")


In [13]:
med_orders = med_orders[~med_orders['charttime'].isna() & 
                        ~med_orders['medication'].isna() & 
                        ~med_orders['event_txt'].isna()]

In [14]:
procs = procs.astype({"chartdate":"datetime64[ns]"})
med_orders = med_orders.astype({"charttime":"datetime64[ns]",
                  "scheduletime":"datetime64[ns]",
                  "storetime":"datetime64[ns]",})

discharges = discharges.astype({"charttime":"datetime64[ns]",
                               "storetime":"datetime64[ns]"})

In [15]:
edstays = edstays.astype({"intime":"datetime64[ns]",
                         "outtime":"datetime64[ns]"})

In [16]:
prescriptions = prescriptions.astype({"starttime":"datetime64[ns]",
                      "stoptime":"datetime64[ns]"})
labs = labs.astype({"charttime":"datetime64[ns]",
                      "storetime":"datetime64[ns]"})

microbio = microbio.astype({"chartdate":"datetime64[ns]",
                      "charttime":"datetime64[ns]",
                           "storetime":"datetime64[ns]",
                      "charttime":"datetime64[ns]"})

In [17]:
# generate free text sections of tables concatenating columns
med_orders['admin_text'] = med_orders['medication'] + " at " + med_orders['charttime'].dt.strftime('%B %d, %Y, %r')
# TODO SORAYA: Add the right information in here that we actually want for the meds
prescriptions['text'] = prescriptions['drug'] + " " + prescriptions['prod_strength']


In [18]:
# TODO THOMAS: figure out how to better encapsulate lab values
labs['text'] = (labs['category'].fillna("") + " " + labs['fluid'].fillna("") + " " 
                + labs['label'].fillna("") + ": " + labs['value'].fillna("") + " " 
                + labs['valueuom'].fillna(""))

In [19]:
# TODO SORAYA: check for those patients that don't have comments (they might not have useful information in the test)
microbio['text'] = microbio['test_name'].fillna("") + ": " + microbio['comments'].fillna("")

## Generate Input Data List Prompt

In [20]:
z = discharges[discharges['note_id'] == note_id].squeeze()

In [21]:
display(z)

note_id                                          10415772-DS-14
subject_id                                             10415772
hadm_id                                                20648185
note_type                                                    DS
note_seq                                                     14
charttime                                   2180-11-01 00:00:00
storetime                                   2180-11-01 16:42:00
text           \nName:  ___                    Unit No:   __...
Name: 2640, dtype: object

In [22]:
service_prompts, care_units = create_pt_prompt_per_service(z, prompts, 
                                               edstays, 
                                               pts, triage, transfers, diags, procs, prescriptions, labs, microbio)

In [23]:
print(service_prompts)
print(care_units)

["___ is a 69 year old F that initially presented to the ED with ETOH. By the end of the hospital stay, ___ was given the following diagnoses in order of importance to this admission: Acute alcoholic intoxication in alcoholism, unspecified, Alcohol withdrawal, Other specified cardiac dysrhythmias, Hypoxemia, Open wound of back, without mention of complication, Other nonspecific abnormal finding of lung field, Chronic airway obstruction, not elsewhere classified, Other dependence on machines, supplemental oxygen, Personal history of tobacco use, Unspecified accident, Accidents occurring in unspecified place.A full list of the patient's procedures, medications, labs, and lab cultures is given below. Procedures are ordered by priority and medications are ordered chronologically. Given this information, please generate a SOAP progress note for this patient for their care during this part of their hospital course staying in the Emergency Department ward. Be SPECIFIC to the conditions, any a

## Create SOAP notes from GPT API

In [24]:
import openai
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2023-07-01-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")
engine = "decile-gpt-35-turbo-16k"


In [25]:
# drop all eventtypes of type: discharge, since we don't need a note for that
# service_prompts = service_prompts[service_prompts['eventtype'] != "discharge"]

# we assume the last one is discharge and drop anything that's an empty string: 
service_prompts = [prompt for prompt in service_prompts if prompt]

<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [26]:
gpt_inputs = []
for service_prompt, care_unit in zip(service_prompts, care_units):
    system_role = prompts.loc[prompts['prompt_name'] == 'SOAP_system_message', "prompt"].squeeze()
    system_role = system_role.replace(r"{{ward}}", care_unit)
    message_text = [{"role":"system","content":system_role}]
    
    gpt_service_prompt = {"role":"user",
                     "content":service_prompt}
    
    message_text.append(gpt_service_prompt)
    
    # print(f"Deprescribe Prompt: {message_text}")
    gpt_inputs.append(message_text)

NameError: name 'careunit' is not defined

In [None]:
completions = []
for input in gpt_inputs:
    completion = openai.ChatCompletion.create(
      engine=engine,
      messages = input,
    )
    completions.append(completion['choices'][0]['message']['content'])

In [None]:
service_prompts['gpt_SOAP_note'] = completions

In [None]:
service_prompts.to_csv(soap_notes_fp)