In [3]:
# -----------------------------------------------------------
# 1. Load Visit Data from i2b2-Mimicking CSV
# -----------------------------------------------------------
# This cell loads the consolidated CSV file which mimics the i2b2 visit_dimension table.
# Each row contains patient ID, visit date, asthma flag, and a BinHex-encoded clinical note.

import pandas as pd
import binascii

csv_path = "datafiles/i2b2_visit_dimension.csv"
df = pd.read_csv(csv_path)
df["has_asthma"] = df["location_cd"] == "ASTHMA_CLINIC"

# Show sample records
df.head()

Unnamed: 0,encounter_num,patient_num,start_date,end_date,inout_cd,location_cd,location_path,visit_blob,file_name,has_asthma
0,475303,1000000001,01/16/2003,01/16/2003,O,ASTHMA_CLINIC,\Hospital\Clinic\Pulmonary\Asthma\\,0x2A2A566973697420496E666F726D6174696F6E3A2A2A...,PID:1000000001-EID:475303-CCD:PNote-SDT:200301...,True
1,479681,1000000001,03/29/2007,03/29/2007,O,ASTHMA_CLINIC,\Hospital\Clinic\Pulmonary\Asthma\\,0x4F6E204D617263682032392C20323030372C20612032...,PID:1000000001-EID:479681-CCD:PNote-SDT:200703...,True
2,480315,1000000001,09/20/2007,09/20/2007,O,ASTHMA_CLINIC,\Hospital\Clinic\Pulmonary\Asthma\\,0x2A2A566973697420496E666F726D6174696F6E3A2A2A...,PID:1000000001-EID:480315-CCD:PNote-SDT:200709...,True
3,480903,1000000001,03/04/2008,03/04/2008,O,ASTHMA_CLINIC,\Hospital\Clinic\Pulmonary\Asthma\\,0x2A2A566973697420496E666F726D6174696F6E3A2A2A...,PID:1000000001-EID:480903-CCD:PNote-SDT:200803...,True
4,481398,1000000001,08/11/2008,08/11/2008,O,ASTHMA_CLINIC,\Hospital\Clinic\Pulmonary\Asthma\\,0x2A2A566973697420496E666F726D6174696F6E3A2A2A...,PID:1000000001-EID:481398-CCD:PNote-SDT:200808...,True


In [5]:
# -----------------------------------------------------------
# 2. Interactive Patient Note Preview
# -----------------------------------------------------------
# Dropdown updates the preview of the first 15 lines of the decoded patient note.

from IPython.display import display, Markdown
import ipywidgets as widgets
import binascii

def show_patient_note(patient_id):
    row = df[df["patient_num"] == patient_id].iloc[0]
    
    hex_data = row["visit_blob"].replace("0x", "")
    bin_data = binascii.unhexlify(hex_data)
    note_text = bin_data.decode("utf-8", errors="ignore")
    preview = "\n".join(note_text.splitlines()[:15])
    
    display(Markdown(f"### Patient {row['patient_num']} ({'Has Asthma' if row['has_asthma'] else 'No Asthma'}) - {row['start_date']}"))
    display(widgets.Textarea(value=preview, layout=widgets.Layout(width="100%", height="300px")))

# Dropdown with interactive behavior
widgets.interact(show_patient_note, patient_id=widgets.Dropdown(options=sorted(df["patient_num"].unique()), description="Patient #:"))


interactive(children=(Dropdown(description='Patient #:', options=(np.int64(1000000001), np.int64(1000000002), …

<function __main__.show_patient_note(patient_id)>

In [6]:
# -----------------------------------------------------------
# 3. Define Prompt for Clinical Note Analysis
# -----------------------------------------------------------
# This cell creates a structured prompt using ChatPromptTemplate to instruct the model
# to extract useful clinical information from the patient note.

from langchain_core.prompts import ChatPromptTemplate

messages_notes = [
    ("system", 
     "You are an advanced medical documentation assistant with expertise in clinical text analysis. Your task is to review a given clinical note and extract relevant medical details accurately."),
    ("human", 
     "Please analyze the following clinical note: {patient_note}.\n\n"
     "Extract and list the following information:\n"
     "1. Patient demographics\n"
     "2. Chief Complaints\n"
     "3. Current Medications\n"
     "4. Determine whether the patient has asthma (Yes/No), based on explicit mentions or related diagnoses.\n\n"
     "Provide the output in a structured format.")
]

prompt_template_notes = ChatPromptTemplate.from_messages(messages_notes)