# SOAP Note Generation

This notebook performs step 1 of the Brief Hospital Course pipeline, in which we generate a SOAP note for each service based on a list of information that has been passed into a GPT-3.5 model. 

In [1]:
import pandas as pd
import ast
import numpy as np

from tqdm import tqdm
# from tqdm.auto import tqdm  # for notebooks
tqdm.pandas()

import os
import openai

In [2]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

True

## Read in Data

In [3]:
# discharge summaries
discharges = pd.read_csv("/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/discharge.csv.gz")

# ed stays
edstays = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/edstays.csv.gz')

# triage
triage = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/public/train/triage.csv.gz')

# ward transfers
transfers = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/transfers.csv.gz')

# higher-level services (ICU, CARD, etc)
services = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/services.csv.gz')

# get patient info
pts = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/patients.csv.gz')

# admission demographics
admissions = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/admissions.csv.gz')

# procedures
procs = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/procedures_icd.csv.gz')
procs_icd = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/d_icd_procedures.csv.gz')

# diagnoses
diags = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/diagnoses_icd.csv.gz')
diags_icd = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/d_icd_diagnoses.csv.gz')

# meds
med_orders = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/emar.csv.gz')


In [None]:
prescriptions = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/prescriptions.csv.gz')
labs = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/labevents.csv.gz')
microbio = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/microbiologyevents.csv.gz')

  prescriptions = pd.read_csv('/gpfs/milgram/project/rtaylor/shared/DischargeMe/mimiciv/hosp/prescriptions.csv.gz')


### Clean up/type cast data

In [5]:
med_orders = med_orders[~med_orders['charttime'].isna() & 
                        ~med_orders['medication'].isna() & 
                        ~med_orders['event_txt'].isna()]

In [6]:
procs = procs.astype({"chartdate":"datetime64[ns]"})
med_orders = med_orders.astype({"charttime":"datetime64[ns]",
                  "scheduletime":"datetime64[ns]",
                  "storetime":"datetime64[ns]",})

discharges = discharges.astype({"charttime":"datetime64[ns]",
                               "storetime":"datetime64[ns]"})

In [7]:
# drop any potential repeats
procs_icd = procs_icd.groupby(["icd_code", "icd_version"]).first().reset_index()
diags_icd = diags_icd.groupby(["icd_code", "icd_version"]).first().reset_index()

In [8]:
# grab long_titles for procs/diags
procs = procs.merge(procs_icd, on=["icd_code", "icd_version"], how="left")
diags = diags.merge(diags_icd, on=["icd_code", "icd_version"], how="left")

In [27]:
# create initial input text prompt
def get_demos(subject_id):
    # has gender, anchor-age, date-of-death if exists
    return pts[pts['subject_id'] == subject_id].squeeze()
    
def get_transfers(hadm_id):
    return transfers[transfers['hadm_id'] == hadm_id].sort_values("intime").squeeze()
    
def get_procs_within_service(hadm_id, transfer_event):
    adm_procs = procs[procs['hadm_id'] == hadm_id]
    adm_procs = adm_procs.sort_values("seq_num")
    
    adm_procs_in_unit = adm_procs[(adm_procs['chartdate'] > transfer_event['intime'])
                                & (adm_procs['chartdate'] < transfer_event['outtime'])]
    return adm_procs_in_unit
    
def get_procs(hadm_id):
    adm_procs = procs[procs['hadm_id'] == hadm_id]
    return adm_procs.sort_values("seq_num")

def get_diags(hadm_id):
    adm_diags = diags[diags['hadm_id'] == hadm_id]
    return adm_diags.sort_values("seq_num")

def get_med_orders_within_service(hadm_id, transfer_event):
    med_admin = med_orders[(med_orders['hadm_id'] == hadm_id) & (med_orders['event_txt'] == "Administered")]
    med_admin = med_admin.sort_values("emar_seq")
    
    adm_diags_in_unit = med_admin[(med_admin['charttime'] > transfer_event['intime'])
                                & (med_admin['charttime'] < transfer_event['outtime'])]

    adm_diags_in_unit['admin_text'] = adm_diags_in_unit['medication'] + " at " + adm_diags_in_unit['charttime'].dt.strftime('%B %d, %Y, %r')
    
    return adm_diags_in_unit

def get_med_orders(hadm_id):
    med_admin = med_orders[(med_orders['hadm_id'] == hadm_id) & (med_orders['event_txt'] == "Administered")]
    med_admin['admin_text'] = med_admin['medication'] + " at " + med_admin['charttime'].dt.strftime('%B %d, %Y, %r')
    return med_admin

def get_triage_info(stay_id):
    return triage[triage['stay_id'] == stay_id]

In [28]:
def create_pt_prompt(discharge_row):

    demos = get_demos(discharge_row['subject_id'])
    if demos.empty:
        age = r"[UNKNOWN AGE]"
        sex = r"[UNKNOWN SEX]"
    else:
        age = demos['anchor_age']
        sex = demos['gender']
    
    chief_complaints = discharge_row['chiefcomplaint']
    if sex:
        pronoun = ["he","his"] if sex == "M" else ['she',"her"]
    else:
        pronoun = ["they", "their"]

    careunits = ast.literal_eval(discharge_row['careunit'].replace('nan', 'None'))
    careunits = [x for x in careunits if x]

    # transfers with dates
    tranfers = get_transfers(discharge_row['hadm_id'])
    
    procs = get_procs(discharge_row['hadm_id'])
    diags = get_diags(discharge_row['hadm_id'])
    
    med_orders = get_med_orders(discharge_row['hadm_id'])

    prompt = f"___ is a {age} year old {sex} presenting to the ED with {chief_complaints}. Over the course of {pronoun[1]} hospital course, ___ started at {careunits[0]} and then visited {', '.join(careunits[1:])}. Over the course of their hospital stay, {pronoun[0]} was given the following diagnoses: {', '.join(diags['long_title'])} in order of importance to this admission.\n\nThis led to receiving the following procedures: {', '.join(procs['long_title'])} in order of priority.\n\n___ also received the following medications: {', '.join(med_orders['admin_text'])}"
    
    return prompt
    
