## Setup

In [146]:
# -------------------------
# Imports and Setup
# -------------------------
!pip install transformers accelerate datasets groq



In [147]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import pandas as pd
import torch
from groq import Groq

In [148]:
import os
# Set the API key as an environment variable
os.environ['GROQ_API_KEY'] = "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"
# os.environ['GROQ_API_KEY'] = "INSET_YOUR_KEY_HERE"

## Load Data

Load Synthea dataset


In [149]:
import requests
import zipfile
import io
import os

In [150]:
def load_and_save_csv_from_zip(url: str, output_dir: str = './'):
    # Download the ZIP file
    response = requests.get(url)
    with zipfile.ZipFile(io.BytesIO(response.content)) as z:
        for file_info in z.infolist():
            if file_info.filename.endswith(".csv"):
                print(f"Extracting {file_info.filename}")
                z.extract(file_info, path=output_dir)

In [151]:
# Synthea Dataset
url = "https://synthetichealth.github.io/synthea-sample-data/downloads/latest/synthea_sample_data_csv_latest.zip"
load_and_save_csv_from_zip(url)

Extracting supplies.csv
Extracting allergies.csv
Extracting procedures.csv
Extracting claims_transactions.csv
Extracting medications.csv
Extracting payers.csv
Extracting claims.csv
Extracting observations.csv
Extracting encounters.csv
Extracting patients.csv
Extracting immunizations.csv
Extracting conditions.csv
Extracting providers.csv
Extracting imaging_studies.csv
Extracting devices.csv
Extracting careplans.csv
Extracting organizations.csv
Extracting payer_transitions.csv


In [152]:
# -------------------------
# Load and Process Synthea Data
# -------------------------
def load_patient_history(patient_id, base_path="/content"):
    conditions = pd.read_csv(f"{base_path}/conditions.csv")
    encounters = pd.read_csv(f"{base_path}/encounters.csv")
    history = []

    patient_conditions = conditions[conditions['PATIENT'] == patient_id]
    for _, row in patient_conditions.iterrows():
        history.append(f"Condition: {row['DESCRIPTION']} (Date: {row['START']})")

    patient_encounters = encounters[encounters['PATIENT'] == patient_id]
    for _, row in patient_encounters.iterrows():
        history.append(f"Encounter: {row['DESCRIPTION']} (Date: {row['START']})")

    return "\n".join(history)

In [153]:
# Load patient data
patients = pd.read_csv("/content/patients.csv")
sample_patient_id = patients.iloc[0]['Id']
history_text = load_patient_history(sample_patient_id)

## Load model

In [154]:
def load_model(model_name="axiong/PMC_LLaMA_13B"):
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

    # Ensure pad_token is set properly
    if tokenizer.pad_token is None:
        if hasattr(tokenizer, 'eos_token') and tokenizer.eos_token is not None:
            tokenizer.pad_token = tokenizer.eos_token
        else:
            tokenizer.add_special_tokens({'pad_token': '[PAD]'})
            model.resize_token_embeddings(len(tokenizer))

    return tokenizer, model

In [155]:
# Load model - call the load_model function
model_tokenizer, model = load_model()

tokenizer_config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

pytorch_model-00001-of-00006.bin:   0%|          | 0.00/9.96G [00:00<?, ?B/s]

pytorch_model-00005-of-00006.bin:   0%|          | 0.00/9.87G [00:00<?, ?B/s]

pytorch_model-00002-of-00006.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

pytorch_model-00003-of-00006.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

pytorch_model-00006-of-00006.bin:   0%|          | 0.00/2.49G [00:00<?, ?B/s]

pytorch_model-00004-of-00006.bin:   0%|          | 0.00/9.87G [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.1k [00:00<?, ?B/s]



Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]



## Build Prompts

In [156]:
def build_simplify_prompt(history_text):
    return f"""
    Simplify and Summarize the following medical note so that a patient without a medical background can understand it:
    ---
    {history_text}
    ---
    """

def build_simplify_fewshot_prompt(history_text):
    return f"""
    Simplify and Summarize the following medical note so that a patient without a medical background can understand it:

    Note: “Diagnosed with hypertension and prescribed Lisinopril”
    Simplified: “You have high blood pressure. Your doctor gave you medicine.”

    Now simplify:
    ---
    {history_text}
    ---
    """

def build_prediction_prompt_cot(history_text):
    return f"""
    Simplify and Summarize the following medical note so that a patient without a medical background can understand it

    Patient History:
    - Type 2 Diabetes
    - Foot pain
    - Neuropathy

    Reason through history and predict the next encounter:

    {history_text}

"""

def build_prediction_prompt_fewshot(history_text):
    return f"""
    Simplify and Summarize the following medical note so that a patient without a medical background can understand it:
    ---
    Example 1:
    History:
    - Hypertension
    - High cholesterol
    - Chest pain
    Prediction: The patient may have a cardiology consultation next.

    Example 2:
    History:
    - Asthma
    - Shortness of breath
    Prediction: The next likely encounter is a pulmonary function test.

    Now analyze the following patient:
    History:
    {history_text}
    Prediction:
    """

In [157]:
# Build prompts
simplify_prompt = build_simplify_prompt(history_text)
simplify_fewshot_prompt = build_simplify_fewshot_prompt(history_text)
predict_cot_prompt = build_prediction_prompt_cot(history_text)
predict_fewshot_prompt = build_prediction_prompt_fewshot(history_text)

## Generate Text

In [168]:
# PMC LLama 13B
def generate_text(prompt, model, tokenizer):
    generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
    result = generator(prompt, do_sample=True, temperature=0.7, truncation=True, padding=True)[0]['generated_text']
    return result

In [169]:
# Groq - llama-3.3-70b-versatile
def generate_text_groq(prompt):
    # Initialize the Groq client
    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

    # Request chat completion from the model
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="llama-3.3-70b-versatile",
    )

    # Return the generated text
    return chat_completion.choices[0].message.content

In [170]:
# Generate outputs
simplified_output = generate_text(simplify_prompt, model, model_tokenizer)
simplified_fewshot_output = generate_text(simplify_fewshot_prompt, model, model_tokenizer)
prediction_cot_output = generate_text(predict_cot_prompt, model, model_tokenizer)
prediction_fewshot_output = generate_text(predict_fewshot_prompt, model, model_tokenizer)

Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0


In [171]:
# Generate outputs - groq
simplified_output_groq = generate_text_groq(simplify_prompt)
simplified_fewshot_output_groq = generate_text_groq(simplify_fewshot_prompt)
prediction_cot_output_groq = generate_text_groq(predict_cot_prompt)
prediction_fewshot_output_groq = generate_text_groq(predict_fewshot_prompt)

## Evaluate Results

In [172]:
print(history_text)

Condition: Housing unsatisfactory (finding) (Date: 2012-04-01)
Condition: Received higher education (finding) (Date: 2012-04-01)
Condition: Transport problem (finding) (Date: 2012-04-01)
Condition: Social isolation (finding) (Date: 2012-04-01)
Condition: Stress (finding) (Date: 2012-04-01)
Condition: Medication review due (situation) (Date: 2013-04-07)
Condition: Full-time employment (finding) (Date: 2013-04-07)
Condition: Lack of access to transportation (finding) (Date: 2013-04-07)
Condition: Fracture of bone (disorder) (Date: 2015-09-28)
Condition: Closed fracture of hip (disorder) (Date: 2015-09-28)
Condition: Limited social contact (finding) (Date: 2016-04-10)
Condition: Victim of intimate partner abuse (finding) (Date: 2016-04-10)
Condition: Gingivitis (disorder) (Date: 2016-04-10)
Condition: Medication review due (situation) (Date: 2019-04-14)
Condition: Stress (finding) (Date: 2019-04-14)
Condition: Sprain (morphologic abnormality) (Date: 2020-10-30)
Condition: Sprain of ankle 

Simplify Patient Note

In [173]:
print(f'PMC_LLAMA:\n {simplified_output}')

PMC_LLAMA:
 
    Simplify and Summarize the following medical note so that a patient without a medical background can understand it:
    ---
    Condition: Housing unsatisfactory (finding) (Date: 2012-04-01)
Condition: Received higher education (finding) (Date: 2012-04-01)
Condition: Transport problem (finding) (Date: 2012-04-01)
Condition: Social isolation (finding) (Date: 2012-04-01)
Condition: Stress (finding) (Date: 2012-04-01)
Condition: Medication review due (situation) (Date: 2013-04-07)
Condition: Full-time employment (finding) (Date: 2013-04-07)
Condition: Lack of access to transportation (finding) (Date: 2013-04-07)
Condition: Fracture of bone (disorder) (Date: 2015-09-28)
Condition: Closed fracture of hip (disorder) (Date: 2015-09-28)
Condition: Limited social contact (finding) (Date: 2016-04-10)
Condition: Victim of intimate partner abuse (finding) (Date: 2016-04-10)
Condition: Gingivitis (disorder) (Date: 2016-04-10)
Condition: Medication review due (situation) (Date: 2019

In [174]:
print(f'GROQ:\n {simplified_output_groq}')

GROQ:
 **Summary of Your Medical History:**

This is a summary of your medical history from 2012 to 2022. It includes information about your health conditions, medical visits, and personal situations.

**Health Conditions:**

* You have experienced stress, social isolation, and limited social contact at various times.
* You have had several medical conditions, including:
	+ A bone fracture (2015)
	+ A hip fracture (2015)
	+ Gingivitis (gum inflammation) (2016 and 2022)
	+ A sprained ankle (2020)
	+ Acute viral pharyngitis (a viral throat infection) (2021)
	+ Prediabetes (a condition that can lead to diabetes) (2022)
* You have also experienced intimate partner abuse (2016 and 2022).

**Medical Visits:**

* You have had regular general examinations with your doctor (2012, 2013, 2016, 2019, and 2022).
* You have visited the emergency room for injuries (2015 and 2020).
* You have had check-ups with your doctor (2015, 2016, and 2022).

**Personal Situations:**

* You have experienced trans

Simply - Few Shot

In [175]:
print(f'PMC_LLAMA:\n {simplified_fewshot_output.strip()}')

PMC_LLAMA:
 Simplify and Summarize the following medical note so that a patient without a medical background can understand it:
    
    Note: “Diagnosed with hypertension and prescribed Lisinopril”
    Simplified: “You have high blood pressure. Your doctor gave you medicine.”

    Now simplify:
    ---
    Condition: Housing unsatisfactory (finding) (Date: 2012-04-01)
Condition: Received higher education (finding) (Date: 2012-04-01)
Condition: Transport problem (finding) (Date: 2012-04-01)
Condition: Social isolation (finding) (Date: 2012-04-01)
Condition: Stress (finding) (Date: 2012-04-01)
Condition: Medication review due (situation) (Date: 2013-04-07)
Condition: Full-time employment (finding) (Date: 2013-04-07)
Condition: Lack of access to transportation (finding) (Date: 2013-04-07)
Condition: Fracture of bone (disorder) (Date: 2015-09-28)
Condition: Closed fracture of hip (disorder) (Date: 2015-09-28)
Condition: Limited social contact (finding) (Date: 2016-04-10)
Condition: Victim

In [176]:
print(f'GROQ:\n {simplified_fewshot_output_groq}')

GROQ:
 Here's a simplified and summarized version of your medical note that's easier to understand:

**Your Health History:**

* You've had some problems with housing and transportation in the past.
* You've experienced stress, social isolation, and limited social contact.
* You've been a victim of intimate partner abuse.
* You've had some health issues, including:
	+ A broken hip (2015)
	+ Gingivitis (gum disease) - multiple times
	+ A sprained ankle (2020)
	+ A viral throat infection (2021)
	+ Prediabetes (means you're at risk for developing diabetes) - diagnosed in 2022
* You've had regular check-ups and general examinations with your doctor.
* You've been to the emergency room a few times for different issues.

**Important Notes:**

* Your doctor has been keeping an eye on your medications and has reviewed them with you several times.
* You've been working full-time, which is a positive aspect of your life.

Remember, this is a summary of your medical note, and it's always best to 

Next Encounter Prediction - Chain of Thought

In [177]:
print(f'PMC_LLAMA:\n {str(prediction_cot_output).strip()}')

PMC_LLAMA:
 Simplify and Summarize the following medical note so that a patient without a medical background can understand it

    Patient History:
    - Type 2 Diabetes
    - Foot pain
    - Neuropathy

    Reason through history and predict the next encounter:

    Condition: Housing unsatisfactory (finding) (Date: 2012-04-01)
Condition: Received higher education (finding) (Date: 2012-04-01)
Condition: Transport problem (finding) (Date: 2012-04-01)
Condition: Social isolation (finding) (Date: 2012-04-01)
Condition: Stress (finding) (Date: 2012-04-01)
Condition: Medication review due (situation) (Date: 2013-04-07)
Condition: Full-time employment (finding) (Date: 2013-04-07)
Condition: Lack of access to transportation (finding) (Date: 2013-04-07)
Condition: Fracture of bone (disorder) (Date: 2015-09-28)
Condition: Closed fracture of hip (disorder) (Date: 2015-09-28)
Condition: Limited social contact (finding) (Date: 2016-04-10)
Condition: Victim of intimate partner abuse (finding) (Da

In [178]:
print(f'GROQ:\n {prediction_cot_output_groq}')

GROQ:
 **Summary of Your Medical History:**

You have been living with Type 2 Diabetes, which affects how your body processes sugar. You also experience foot pain and neuropathy, a condition that can cause numbness or tingling in your feet.

Over the years, you've faced some challenges that can impact your health, including:

* Housing issues
* Transportation problems
* Social isolation (feeling lonely or disconnected from others)
* Stress
* Intimate partner abuse (you've experienced abuse in a relationship)
* Limited access to dental care (you've had gingivitis, a gum infection)

You've also had some injuries and illnesses, including:

* A hip fracture (a broken bone in your hip)
* A sprained ankle
* A viral throat infection
* Foot pain and neuropathy

**Your Medical Visits:**

You've had regular check-ups with your doctor, as well as some emergency room visits for injuries or illnesses. Your doctor has also reviewed your medications with you several times to make sure you're taking t

Next Encounter Prediction - Few Shot Chain of Thought

In [179]:
print(f'PMC_LLAMA:\n {prediction_fewshot_output.strip()}')

PMC_LLAMA:
 Simplify and Summarize the following medical note so that a patient without a medical background can understand it:
    ---
    Example 1:
    History:
    - Hypertension
    - High cholesterol
    - Chest pain
    Prediction: The patient may have a cardiology consultation next.

    Example 2:
    History:
    - Asthma
    - Shortness of breath
    Prediction: The next likely encounter is a pulmonary function test.

    Now analyze the following patient:
    History:
    Condition: Housing unsatisfactory (finding) (Date: 2012-04-01)
Condition: Received higher education (finding) (Date: 2012-04-01)
Condition: Transport problem (finding) (Date: 2012-04-01)
Condition: Social isolation (finding) (Date: 2012-04-01)
Condition: Stress (finding) (Date: 2012-04-01)
Condition: Medication review due (situation) (Date: 2013-04-07)
Condition: Full-time employment (finding) (Date: 2013-04-07)
Condition: Lack of access to transportation (finding) (Date: 2013-04-07)
Condition: Fracture of

In [180]:
print(f'GROQ:\n {prediction_fewshot_output_groq}')

GROQ:
 **Summary of Your Medical History:**

You have had several health issues and challenges over the years. These include:

1. **Social and Environmental Issues:** You have experienced difficulties with housing, transportation, and social isolation. You have also been a victim of intimate partner abuse.
2. **Physical Health Issues:** You have had a fracture of the hip, sprains (including an ankle sprain), acute viral pharyngitis (a throat infection), and gingivitis (gum disease). You also have prediabetes, which means your blood sugar levels are higher than normal but not high enough to be considered diabetes.
3. **Mental Health:** You have experienced stress.
4. **Regular Check-Ups:** You have had regular general examinations and check-ups with your healthcare provider.

**Next Steps:**

Based on your history, the next likely steps may include:

1. **Follow-up Appointments:** Regular check-ups to monitor your prediabetes, gum disease, and other health issues.
2. **Medication Review

In [181]:
# Save results
with open("simplified_instruction.txt", "w") as f:
    f.write(simplified_output)

with open("simplified_fewshot.txt", "w") as f:
    f.write(simplified_fewshot_output)

with open("prediction_cot.txt", "w") as f:
    f.write(prediction_cot_output)

with open("prediction_fewshot.txt", "w") as f:
    f.write(prediction_fewshot_output)

In [182]:
# Save results - Groq
with open("simplified_instruction_groq.txt", "w") as f:
    f.write(simplified_output_groq)

with open("simplified_fewshot.txt_groq", "w") as f:
    f.write(simplified_fewshot_output_groq)

with open("prediction_cot_groq.txt", "w") as f:
    f.write(prediction_cot_output_groq)

with open("prediction_fewshot_groq.txt", "w") as f:
    f.write(prediction_fewshot_output_groq)