In [None]:
import os
import time
import json
import requests
import pandas as pd

class Retriever:
    def __init__(self, data_directory):
        self.data_directory = data_directory
        self.patient_data_files = {
            "patients": "patients.csv",
            "conditions": "conditions.csv",
            "medications": "medications.csv",
            "observations": "observations.csv",
            "procedures": "procedures.csv",
            "encounters": "encounters.csv",
            "imaging_studies": "imaging_studies.csv",
        }
        
        if not os.path.exists(self.data_directory):
            raise FileNotFoundError(f"Error: The directory {self.data_directory} does not exist.")

    def fetch_patient_history(self, patient_id):
        print("Step 1: Retrieval")
        time.sleep(1)
        print("- Searching for patient data in multiple sources...")
        
        patient_data = {"conditions": [], "medications": [], "observations": {}, "procedures": [], "encounters": [], "imaging_reports": []}
        
        # Load structured patient data
        for key, filename in self.patient_data_files.items():
            file_path = os.path.join(self.data_directory, filename)
            if os.path.exists(file_path):
                df = pd.read_csv(file_path)
                df.columns = df.columns.str.upper()  # Normalize column names to uppercase
                print(f"Columns in {filename}: {df.columns.tolist()}")  # Debugging
                
                if "PATIENT" not in df.columns:
                    print(f"Warning: 'PATIENT' column not found in {filename}")
                    continue
                
                df_filtered = df[df["PATIENT"] == patient_id]
                
                if key == "conditions":
                    patient_data["conditions"] = df_filtered.get("DESCRIPTION", []).tolist()
                elif key == "medications":
                    patient_data["medications"] = df_filtered.get("DESCRIPTION", []).tolist()
                elif key == "observations":
                    if "DESCRIPTION" in df.columns and "VALUE" in df.columns and "UNITS" in df.columns:
                        patient_data["observations"] = dict(zip(df_filtered["DESCRIPTION"], df_filtered["VALUE"].astype(str) + " " + df_filtered["UNITS"]))
                elif key == "procedures":
                    patient_data["procedures"] = df_filtered.get("DESCRIPTION", []).tolist()
                elif key == "encounters":
                    patient_data["encounters"] = df_filtered.get("DESCRIPTION", []).tolist()
                elif key == "imaging_studies":
                    patient_data["imaging_reports"] = df_filtered.get("BODYSITE_DESCRIPTION", []).tolist()
        
        # Load unstructured text data (care plans, imaging reports, patient notes)
        patient_data["care_plans"] = self.extract_text_data("care_plan")
        patient_data["imaging_reports_text"] = self.extract_text_data("imaging_report")
        patient_data["patient_notes"] = self.extract_text_data("patient_note")
        
        return patient_data
    
    def extract_text_data(self, keyword):
        extracted_texts = []
        if not os.path.exists(self.data_directory):
            print(f"Warning: Directory {self.data_directory} not found.")
            return extracted_texts
        for file in os.listdir(self.data_directory):
            if file.startswith(keyword) and file.endswith(".txt"):
                file_path = os.path.join(self.data_directory, file)
                with open(file_path, "r") as f:
                    extracted_texts.append(f.read().strip())
        return extracted_texts

class Analyzer:
    def analyze_history(self, patient_data, query):
        print("\nStep 2: Analysis")
        time.sleep(1)
        relevant_info = {}
        
        if "symptoms" in query.lower():
            relevant_info["Symptoms"] = patient_data.get("conditions", [])
        if "medications" in query.lower():
            relevant_info["Medications"] = patient_data.get("medications", [])
        if "lab results" in query.lower():
            relevant_info["Lab Results"] = patient_data.get("observations", {})
        if "procedures" in query.lower():
            relevant_info["Procedures"] = patient_data.get("procedures", [])
        if "encounters" in query.lower():
            relevant_info["Encounters"] = patient_data.get("encounters", [])
        if "care plan" in query.lower():
            relevant_info["Care Plans"] = patient_data.get("care_plans", [])
        if "imaging report" in query.lower():
            relevant_info["Imaging Reports"] = patient_data.get("imaging_reports_text", [])
        
        if not relevant_info:
            relevant_info["General Summary"] = "No specific data available for the given query."
        
        print("- Extracted relevant information based on query.")
        return relevant_info

class Presenter:
    def generate_summary(self, relevant_info, query):
        print("\nStep 3: Presentation")
        time.sleep(1)
        print("- Generating AI-enhanced summary...")
        try:
            url = "https://api.groq.com/openai/v1/chat/completions"
            # Make sure you enter the API key before running the code
            headers = {"Authorization": "Bearer <Enter your Groq API key>", "Content-Type": "application/json"} 
            payload = {
                "model": "deepseek-r1-distill-llama-70b",
                "messages": [
                    {"role": "system", "content": "You are a medical assistant summarizing patient history."},
                    {"role": "user", "content": f"Patient data: {json.dumps(relevant_info)[:3000]}. Answer the query: {query}"}
                ]
            }
            response = requests.post(url, headers=headers, json=payload)
            response_json = response.json()
            print("AI Response:", response_json)  # Debugging
            summary = response_json.get("choices", [{}])[0].get("message", {}).get("content", "AI response error")
            print(summary)
        except Exception as e:
            print(f"AI enhancement failed: {e}")

class PatientHistoryAnalyzer:
    def __init__(self, data_directory):
        self.retriever = Retriever(data_directory)
        self.analyzer = Analyzer()
        self.presenter = Presenter()
    
    def process_query(self, patient_id, query):
        patient_data = self.retriever.fetch_patient_history(patient_id)
        if not patient_data:
            print("No patient data available.")
            return
        relevant_info = self.analyzer.analyze_history(patient_data, query)
        self.presenter.generate_summary(relevant_info, query)

if __name__ == "__main__":
    data_directory = "./data/"  # Update to correct path
    analyzer = PatientHistoryAnalyzer(data_directory)
    patient_id = input("Enter patient ID: ")
    while True:
        query = input("Enter your medical query (or type 'exit' to quit): ")
        if query.lower() == "exit":
            break
        analyzer.process_query(patient_id, query)

Enter patient ID: bfb6537b-535a-4f31-9a56-073220f96a17
Enter your medical query (or type 'exit' to quit): What are the patient's chronic conditions
Step 1: Retrieval
- Searching for patient data in multiple sources...
Columns in patients.csv: ['ID', 'BIRTHDATE', 'DEATHDATE', 'SSN', 'DRIVERS', 'PASSPORT', 'PREFIX', 'FIRST', 'LAST', 'SUFFIX', 'MAIDEN', 'MARITAL', 'RACE', 'ETHNICITY', 'GENDER', 'BIRTHPLACE', 'ADDRESS', 'CITY', 'STATE', 'COUNTY', 'ZIP', 'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE']
Columns in conditions.csv: ['START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION']
Columns in medications.csv: ['START', 'STOP', 'PATIENT', 'PAYER', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'PAYER_COVERAGE', 'DISPENSES', 'TOTALCOST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in observations.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE']
Columns in procedures.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_

Enter your medical query (or type 'exit' to quit): Show recent lab results for the patient
Step 1: Retrieval
- Searching for patient data in multiple sources...
Columns in patients.csv: ['ID', 'BIRTHDATE', 'DEATHDATE', 'SSN', 'DRIVERS', 'PASSPORT', 'PREFIX', 'FIRST', 'LAST', 'SUFFIX', 'MAIDEN', 'MARITAL', 'RACE', 'ETHNICITY', 'GENDER', 'BIRTHPLACE', 'ADDRESS', 'CITY', 'STATE', 'COUNTY', 'ZIP', 'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE']
Columns in conditions.csv: ['START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION']
Columns in medications.csv: ['START', 'STOP', 'PATIENT', 'PAYER', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'PAYER_COVERAGE', 'DISPENSES', 'TOTALCOST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in observations.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE']
Columns in procedures.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in enco

Enter your medical query (or type 'exit' to quit): What procedures has the patient undergone
Step 1: Retrieval
- Searching for patient data in multiple sources...
Columns in patients.csv: ['ID', 'BIRTHDATE', 'DEATHDATE', 'SSN', 'DRIVERS', 'PASSPORT', 'PREFIX', 'FIRST', 'LAST', 'SUFFIX', 'MAIDEN', 'MARITAL', 'RACE', 'ETHNICITY', 'GENDER', 'BIRTHPLACE', 'ADDRESS', 'CITY', 'STATE', 'COUNTY', 'ZIP', 'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE']
Columns in conditions.csv: ['START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION']
Columns in medications.csv: ['START', 'STOP', 'PATIENT', 'PAYER', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'PAYER_COVERAGE', 'DISPENSES', 'TOTALCOST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in observations.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE']
Columns in procedures.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in en

Enter your medical query (or type 'exit' to quit): List all past medical encounters
Step 1: Retrieval
- Searching for patient data in multiple sources...
Columns in patients.csv: ['ID', 'BIRTHDATE', 'DEATHDATE', 'SSN', 'DRIVERS', 'PASSPORT', 'PREFIX', 'FIRST', 'LAST', 'SUFFIX', 'MAIDEN', 'MARITAL', 'RACE', 'ETHNICITY', 'GENDER', 'BIRTHPLACE', 'ADDRESS', 'CITY', 'STATE', 'COUNTY', 'ZIP', 'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE']
Columns in conditions.csv: ['START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION']
Columns in medications.csv: ['START', 'STOP', 'PATIENT', 'PAYER', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'PAYER_COVERAGE', 'DISPENSES', 'TOTALCOST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in observations.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE']
Columns in procedures.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in encounters.

Enter your medical query (or type 'exit' to quit): Show the patient's immunization history
Step 1: Retrieval
- Searching for patient data in multiple sources...
Columns in patients.csv: ['ID', 'BIRTHDATE', 'DEATHDATE', 'SSN', 'DRIVERS', 'PASSPORT', 'PREFIX', 'FIRST', 'LAST', 'SUFFIX', 'MAIDEN', 'MARITAL', 'RACE', 'ETHNICITY', 'GENDER', 'BIRTHPLACE', 'ADDRESS', 'CITY', 'STATE', 'COUNTY', 'ZIP', 'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE']
Columns in conditions.csv: ['START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION']
Columns in medications.csv: ['START', 'STOP', 'PATIENT', 'PAYER', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'PAYER_COVERAGE', 'DISPENSES', 'TOTALCOST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in observations.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE']
Columns in procedures.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in enco

Columns in medications.csv: ['START', 'STOP', 'PATIENT', 'PAYER', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'PAYER_COVERAGE', 'DISPENSES', 'TOTALCOST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in observations.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE']
Columns in procedures.csv: ['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'BASE_COST', 'REASONCODE', 'REASONDESCRIPTION']
Columns in encounters.csv: ['ID', 'START', 'STOP', 'PATIENT', 'ORGANIZATION', 'PROVIDER', 'PAYER', 'ENCOUNTERCLASS', 'CODE', 'DESCRIPTION', 'BASE_ENCOUNTER_COST', 'TOTAL_CLAIM_COST', 'PAYER_COVERAGE', 'REASONCODE', 'REASONDESCRIPTION']
Columns in imaging_studies.csv: ['ID', 'DATE', 'PATIENT', 'ENCOUNTER', 'BODYSITE_CODE', 'BODYSITE_DESCRIPTION', 'MODALITY_CODE', 'MODALITY_DESCRIPTION', 'SOP_CODE', 'SOP_DESCRIPTION']

Step 2: Analysis
- Extracted relevant information based on query.

Step 3: Presentation
- Generating AI-enhanced summary...
AI Response: {'id':

Columns in encounters.csv: ['ID', 'START', 'STOP', 'PATIENT', 'ORGANIZATION', 'PROVIDER', 'PAYER', 'ENCOUNTERCLASS', 'CODE', 'DESCRIPTION', 'BASE_ENCOUNTER_COST', 'TOTAL_CLAIM_COST', 'PAYER_COVERAGE', 'REASONCODE', 'REASONDESCRIPTION']
Columns in imaging_studies.csv: ['ID', 'DATE', 'PATIENT', 'ENCOUNTER', 'BODYSITE_CODE', 'BODYSITE_DESCRIPTION', 'MODALITY_CODE', 'MODALITY_DESCRIPTION', 'SOP_CODE', 'SOP_DESCRIPTION']

Step 2: Analysis
- Extracted relevant information based on query.

Step 3: Presentation
- Generating AI-enhanced summary...
AI Response: {'id': 'chatcmpl-036aecff-5908-4294-9b17-35d706b640c6', 'object': 'chat.completion', 'created': 1739045854, 'model': 'deepseek-r1-distill-llama-70b', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '<think>\nOkay, so I need to figure out if the patient has any allergies based on the provided data. The patient data given is a JSON object with a "General Summary" that says, "No specific data available for the given quer