In [2]:
dataset_questions = [
    "What was my hemoglobin level in the most recent report?",
    "What medications am I currently prescribed, and what are their dosages?",
    "Which hospital did I last visit for my medical visit?",
    "What was the result of my last cholesterol test?",
    "How many times have I been diagnosed with hypertension?",
    "What are the recommendations for managing my diabetes?",
    "What is the contact number of the hospital where I had my last appointment?",
    "What was the total payment for my last medical appointment?",
    "What diagnosis was given by Dr. Jane Doe during my last visit?",
    "What are the normal reference ranges for a glucose test?",
    "How often should I take my prescribed Lisinopril?",
    "What was the interpretation of my last creatinine test result?",
    "What are the tests for which I have a high interpretation?",
    "Which is recommended for my allergy diagnosis?",
    "What is the dosage of the medicine Oseltamivir?",
    "Can you provide the address of the Kindred Hospital I visited in Chicago?",
    "What was my platelet count in the most recent blood report?",
    "Who diagnosed me with flu last year?",
    "What are the dates of the appointments I have with Dr. Michael Smith?",
    "How much did I pay for my medical appointments?",
    "What is the dosage and frequency for the medicine Metformin that I am taking?",
    "Can you list all the doctors I have seen at the 4th hospital in Miami?",
    "What recommendations were given for my last diagnosis of gastritis?",
    "How many times have I been prescribed Cetirizine?",
    "What was the result of my urinalysis report from two visits ago?",
    "Can you provide a summary of my medical reports from the last six months?",
    "What are the test units for the hemoglobin A1c in my reports?",
    "How many different types of reports have I had?",
    "What was the highest payment I made for a single appointment and when?",
    "What is the latest recommendation for my hypertension management?",
    "What are the common recommendations for colds according?",
    "What was the result of my Hemoglobin A1c in the last report?",
    "What was the interpretation of my Hemoglobin A1c in the last report?",
    "What was the interpretation of my bilirubin levels in the last report?",
    "What are all the dates of all the appointments I've had with cardiologists?",
    "How many times have I visited Cedars-Sinai for appointments?",
    "What are the normal reference ranges for neutrophils?",
    "What is the email of doctor Michael Smith?",
    "What is the email of doctor Jane Doe?",
    "What is the phone number of doctor John Doe?",
    "What is the phone number of doctor Maria Garcia?",
    "What hospital does Jane Doe work at?",
    "What kind of doctor is Michael Smith?",
    "What specialization is Maria Garcia?",
    "What is the phone number of NYU Langone?",
    "What is the address of Cedars-Sinai Medical Center?",
    "How many hospitals do I have listed?",
    "What units is the Cholesterol test in?"
]

#### DB QUERY PIPELINE

In [4]:


from langchain_community.utilities.sql_database import SQLDatabase
import os
from helper import generate_query, generate_response, SUBCHAIN_PROMPT, FULLCHAIN_PROMPT
from LLMs import OCTOAI_LLM_Chatbot, langdock_LLM_Chatbot
from pathlib import Path

LANGDOCK_API_KEY = os.getenv("langdock_api_key")
LANGDOCK_BASE_URL = os.getenv("langdock_base_url")

# MODEL1 = "meta-llama-3-8b-instruct"
MODEL2 = "gpt-4o"



# llm_octo =OCTOAI_LLM_Chatbot(model_name=MODEL1, api_key=OCTOAI_API_KEY) #instantiate anyscale llm object


llm_langdock =langdock_LLM_Chatbot(model_name=MODEL2, api_key=LANGDOCK_API_KEY, base_url=LANGDOCK_BASE_URL) #instantiate anyscale llm object


PMA_workspace = Path.cwd().parent.parent
# print(PMA_workspace)
db_path = PMA_workspace / "desktop_app" / "ui" / "DB_query" / "med_assist.db"

# print(db_path)


db_uri = f"sqlite:////{db_path}"

DB = SQLDatabase.from_uri(db_uri)


DB = SQLDatabase.from_uri("sqlite:////Users/mymac/LLM/Personal-Medical-Assistant/backend/llama_index/med_assist.db")

print(DB.get_usable_table_names())


https://api.langdock.com/openai/eu/v1
AnbkO5W0aXPHWzaE8LOKABbaLtuyEEpJycOYpDNedDU=
['USER', 'appointments', 'diagnosis', 'diagnosis_by_doctor', 'doctor_specialization', 'doctors', 'hospitals', 'medicine', 'medicine_by_diagnosis', 'report_type', 'reports']


In [5]:

predictions = []

for q in dataset_questions:

    try:
        query = generate_query(llm=llm_langdock, template=SUBCHAIN_PROMPT, question=q, db=DB)

        # print(f"Predicted DB Query: {query}\n\n")

        response = generate_response(llm=llm_langdock, query=query, template=FULLCHAIN_PROMPT, question=q, db=DB)

        predictions.append(response)
    
    except Exception as e:
        predictions.append("Error")


In [6]:
predictions

['Your hemoglobin level in the most recent report was 19.0.',
 'Error',
 'Kindred Hospital Chicago North',
 'The result of your last cholesterol test was 180.0.',
 'You have been diagnosed with hypertension 3 times.',
 'Monitor blood sugar levels and follow a diabetic diet.',
 'The contact number of the hospital where you had your last appointment is 8375621.',
 'The total payment for your last medical appointment was $70.00.',
 'Error',
 'Error',
 'Error',
 'Error',
 'Hemoglobin, Cholesterol, Glucose',
 'Avoid allergens and take antihistamines.',
 'The dosage of the medicine Oseltamivir is 75.',
 'The address of Kindred Hospital you visited in Chicago is 2544 W Montrose Ave, Chicago, IL 60618.',
 'Your platelet count in the most recent blood report was [test_result].',
 'Error',
 'The dates of the appointments you have with Dr. Michael Smith are:\n\n- 2024-05-15\n- 2024-05-20\n- 2024-06-15',
 'You paid a total of $405.00 for your medical appointments.',
 'The dosage for Metformin is 5

### RAG QUERY PIPELINE

In [8]:
context = """
You are a helpful medical assistant. 

Given the user question and the user medical data, answer the user's question to the best of your ability. The data is an sql database in JSON format. The data is the users personal medical data, so the user may use personal pronouns like 'my' when querying. It is okay to say you do not have the information the user is looking for.

Answer the users question to the best of your ability, ensuring to ONLY respond with information you find in the provided dataset. 

User question: {question}
User medical data: {user_data}

"""

def generate_query(llm, template, user_data, question):
    """
    Generates an SQL query.

    Parameters:
    llm (Class): Class instance of large language model
    template (str): Prompt for llm to follow
    question (str): User's question for the database

    Returns:
    out (str): LLM response
    """

    prompt = template.format(question=question, user_data=user_data)       #format template to include all necessary information (schema, question)
    answer = llm.chat_completion(prompt, question)         #generates sql query

    return answer


import json

# Open the JSON file and load its content into a dictionary
with open('med_assist.json', 'r') as file:
    data = json.load(file)



test_predictions = []
# for q in dataset_questions:
#     question = q
#     pred = generate_query(llm=llm_langdock, template=context, user_data=data, question=question)
#     test_predictions.append(pred)
#     print(pred)

q = "What are the normal reference ranges for a glucose test?"
print(generate_query(llm=llm_langdock, template=context, user_data=data, question=q))
    

The normal reference range for a glucose test, according to your medical data, is 70-100 mg/dL.
