In [None]:
soap_notes_fp = "sample_output.csv"
note_id = "10000032-DS-22"

In [None]:
# Parameters
soap_notes_fp = "soap_note_sample1test_10415772-DS-14.csv"
note_id = "10415772-DS-14"


In [None]:
print("soap_notes_fp =", soap_notes_fp, "note_id =", note_id)

# SOAP Note Generation

This notebook performs step 1 of the Brief Hospital Course pipeline, in which we generate a SOAP note for each service based on a list of information that has been passed into a GPT-3.5 model. 

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import ast
import numpy as np

from tqdm import tqdm
# from tqdm.auto import tqdm  # for notebooks
tqdm.pandas()

import os
import openai

In [None]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

In [None]:
from preprocessing import *
from prompt_functions import create_pt_prompt_per_day, create_pt_prompt_per_service

## Read in Prompts

In [None]:
prompts = pd.read_csv("soap_note_prompts.csv")

## Read in Radiology Reports

In [None]:
radiology = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/radiology.csv.gz")

## Read in Structured Data

In [None]:
###### 
# Challenge Data

# discharge summaries
discharges = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/discharge.csv.gz")

# ed stays
edstays = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/edstays.csv.gz')

# triage
triage = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/triage.csv.gz')

###### 
# MIMIC-IV Data

# ward transfers
transfers = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_transfers.pkl')

# higher-level services (ICU, CARD, etc)
services = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_services.pkl')

# get patient info
pts = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_patients.pkl')

# admission demographics
admissions = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_admissions.pkl')

# procedures
procs = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_procedures_icd.pkl')

# diagnoses
diags = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_diagnoses_icd.pkl')

# meds
med_orders = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_emar.pkl')


In [None]:
prescriptions = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_prescriptions.pkl')
labs = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_labevents.pkl')
microbio = pd.read_pickle('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/cohort_microbiologyevents.pkl')

### Clean up/type cast data

In [None]:
transfers.loc['hadm_id'] = pd.to_numeric(transfers["hadm_id"], errors="ignore")

In [None]:
med_orders = med_orders[~med_orders['charttime'].isna() & 
                        ~med_orders['medication'].isna() & 
                        ~med_orders['event_txt'].isna()]

In [None]:
procs = procs.astype({"chartdate":"datetime64[ns]"})
med_orders = med_orders.astype({"charttime":"datetime64[ns]",
                  "scheduletime":"datetime64[ns]",
                  "storetime":"datetime64[ns]",})

discharges = discharges.astype({"charttime":"datetime64[ns]",
                               "storetime":"datetime64[ns]"})

In [None]:
edstays = edstays.astype({"intime":"datetime64[ns]",
                         "outtime":"datetime64[ns]"})

In [None]:
prescriptions = prescriptions.astype({"starttime":"datetime64[ns]",
                      "stoptime":"datetime64[ns]"})
labs = labs.astype({"charttime":"datetime64[ns]",
                      "storetime":"datetime64[ns]"})

microbio = microbio.astype({"chartdate":"datetime64[ns]",
                      "charttime":"datetime64[ns]",
                           "storetime":"datetime64[ns]",
                      "charttime":"datetime64[ns]"})

In [None]:
# generate free text sections of tables concatenating columns
med_orders['admin_text'] = med_orders['medication'] + " at " + med_orders['charttime'].dt.strftime('%B %d, %Y, %r')
# TODO SORAYA: Add the right information in here that we actually want for the meds
prescriptions['text'] = prescriptions['drug'] + " " + prescriptions['prod_strength']


In [None]:
# TODO THOMAS: figure out how to better encapsulate lab values
labs['text'] = (labs['category'].fillna("") + " " + labs['fluid'].fillna("") + " " 
                + labs['label'].fillna("") + ": " + labs['value'].fillna("") + " " 
                + labs['valueuom'].fillna(""))

In [None]:
# TODO SORAYA: check for those patients that don't have comments (they might not have useful information in the test)
microbio['text'] = microbio['test_name'].fillna("") + ": " + microbio['comments'].fillna("")

## Generate Input Data List Prompt

In [None]:
z = discharges[discharges['note_id'] == note_id].squeeze()

In [None]:
display(z)

In [None]:
transfers_with_soap_notes = create_pt_prompt_per_service(z, prompts, 
                                               edstays, 
                                               pts, triage, transfers, diags, procs, prescriptions, labs, microbio)

In [None]:
display(transfers_with_soap_notes)

## Create SOAP notes from GPT API

In [None]:
import openai
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2023-07-01-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")
engine = "decile-gpt-35-turbo-16k"


In [None]:
# drop all eventtypes of type: discharge, since we don't need a note for that
transfers_with_soap_notes = transfers_with_soap_notes[transfers_with_soap_notes['eventtype'] != "discharge"]

In [None]:
gpt_inputs = []
for _, row in transfers_with_soap_notes.iterrows():
    system_role = prompts.loc[prompts['prompt_name'] == 'SOAP_system_message', "prompt"].squeeze()
    system_role = system_role.replace(r"{{ward}}", row['careunit'])
    message_text = [{"role":"system","content":system_role}]
    
    gpt_service_prompt = {"role":"user",
                     "content":row['service_prompts']}
    
    message_text.append(gpt_service_prompt)
    
    # print(f"Deprescribe Prompt: {message_text}")
    gpt_inputs.append(message_text)

In [None]:
completions = []
for input in gpt_inputs:
    completion = openai.ChatCompletion.create(
      engine=engine,
      messages = input,
    )
    completions.append(completion['choices'][0]['message']['content'])

In [None]:
transfers_with_soap_notes['gpt_SOAP_note'] = completions

In [None]:
transfers_with_soap_notes.to_csv(soap_notes_fp)