In [None]:
from langchain_openai import ChatOpenAI
import httpx

# Create an HTTP client that skips SSL verification (only for hackathon/test environments)
client = httpx.Client(verify=False)
llm = ChatOpenAI(
 base_url="https://genailab.tcs.in",
 model="azure/genailab-maas-gpt-4o",
 api_key="sk-u6zTQaiDKlhHn4-k_hhihw",
 http_client=client
)
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
embedding_model = OpenAIEmbeddings(
 base_url="https://genailab.tcs.in",
 model="azure/genailab-maas-text-embedding-3-large",
 api_key="sk-u6zTQaiDKlhHn4-k_hhihw",
 http_client=client)

import requests
for method in ("get","post","put","delete","head","options","patch"):
    original = getattr(requests,method)

    def insecure_request(*args, _original = original, **kwargs):
        kwargs["verify"] = False
        return _original(*args,**kwargs)
    
    setattr(requests,method,insecure_request)

In [None]:
import os
import json
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough

# -------------------------------
# 1. Loader: read all .txt files
# -------------------------------
def load_txts():
    txt_folder = r"C:\Users\GenAIBLRANCUSR33\Desktop\Team7\database\txt_files"
    txt_files = [os.path.join(txt_folder, f) for f in os.listdir(txt_folder) if f.lower().endswith(".txt")]

    all_docs = []
    for txt_file in txt_files:
        with open(txt_file, "r", encoding="utf-8") as f:
            content = f.read()
            # Return LangChain Document objects
            doc = Document(
                page_content=content,
                metadata={"source": os.path.basename(txt_file)}
            )
            all_docs.append(doc)

    return all_docs

def load_jsons():
    file_path = r"C:\Users\GenAIBLRANCUSR33\Desktop\Team7\database\json_files\mock_ehr.json"
    # Open and load JSON
    with open(file_path, "r", encoding="utf-8") as f:
        ehr_Report = json.load(f)
    return ehr_Report

def get_patient_by_id(patient_id):
    ehr_data = load_jsons()
    for patient in ehr_data.get("patients", []):
        if patient.get("id") == patient_id:
            return patient
    return None 


patient_record_json = get_patient_by_id("P001")

def json_to_text(d, prefix="- "):
    lines = []
    for key, value in d.items():
        key_name = key.replace("_", " ").title()
        if isinstance(value, dict):
            lines.append(f"{prefix}{key_name}:")
            lines.extend([f"  {line}" for line in json_to_text(value, prefix=prefix)])
        elif isinstance(value, list):
            if all(isinstance(i, dict) for i in value):
                lines.append(f"{prefix}{key_name}:")
                for item in value:
                    lines.extend([f"  {line}" for line in json_to_text(item, prefix=prefix)])
            else:
                lines.append(f"{prefix}{key_name}: {', '.join(map(str,value))}")
        else:
            lines.append(f"{prefix}{key_name}: {value}")
    return lines


In [None]:
def getRetriever():
    txt_documents = load_txts()
    chunked_docs = []
    for doc in txt_documents:
        prompt = ChatPromptTemplate.from_messages([
            ("system", 
                "You are a helpful assistant that splits medical or disease-related documents "
                "into semantically meaningful chunks for downstream semantic search and retrieval."
            ),
            ("human", 
                """You will be given a document. Split it into semantically coherent sections, following these rules:

                1. Do not omit any information.
                2. Keep related items together.
                3. Each chunk should be self-contained.
                4. Aim for ~5000â€“6000 words per chunk.
                5. Return chunks as a numbered list with headings like '### Chunk 1:', '### Chunk 2:'.
                6. Include metadata (e.g., source) at the beginning of each chunk.

                Document:
                {document}
                """
            )
        ])

        formatted_prompt = prompt.format_messages(document=doc.page_content)
        response = llm.invoke(formatted_prompt)

        chunks = [c.strip() for c in response.content.split("### Chunk") if c.strip()]
        for idx, chunk in enumerate(chunks):
            chunked_docs.append(
                Document(
                    page_content=chunk,
                    metadata={"source": f"{doc.metadata['source']}_chunk{idx+1}"}
                )
            )

    vector_store = Chroma.from_documents(
        chunked_docs,
        embedding_model,
        persist_directory="chroma_db"
    )

    patient_record = "Patient Past Record:\n" + "\n".join(json_to_text(patient_record_json)) if patient_record_json else "No past record found."
    retriever = vector_store.as_retriever(search_kwargs={"k": 3})

    qa_prompt = ChatPromptTemplate.from_messages([
        ("system",
            "You are a helpful medical assistant. Answer questions using only the provided context. "
            "If the answer is not in the context, say you don't know rather than making up information."
        ),
        ("human",
            """You are given the patient's past medical records and the retrieved context from relevant documents.

            Context (retrieved from vector database):
            {context}

            Patient Past Record:
            {patient_record}

            Current patient query / symptom:
            {question}

            Guidelines:
            1. Use ONLY the information from the retrieved context and patient past record.
            2. Synthesize both sources to provide a clear, concise response.
            3. Do NOT invent facts not present in the context or past record.
            4. Provide your response in the following structured format:

                - **Triage Level**: [Low / Medium / High / Emergency]
                - **Reasoning**: [Explain why this triage level is assigned]
                - **Urgent Evaluation Needed**: [Yes/No; specify tests if any]
                - **Patient Actions**: [What patient should do next]
                - **Clinician Tasks**: [Tasks clinician should perform]
                - **Disclaimer**: [Include standard medical disclaimer]

            5. If the context and past record do not contain the answer, respond:
                "The provided documents do not contain information about this."
            """
        )
    ])

    rag_chain = (
        {"context": retriever, "patient_record": lambda _: patient_record, "question": RunnablePassthrough()}
        | qa_prompt
        | llm
    )
    return rag_chain


In [None]:
query = "Give related the cancer"
rag_chain = getRetriever()
response = rag_chain.invoke(query)
print(response.content)