In [11]:
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain_cohere import CohereEmbeddings
from langchain_groq import ChatGroq
import time
from IPython.display import clear_output
from dotenv import load_dotenv
import os
from datetime import datetime

def get_datetime_str():
    # Get the current datetime
    current_time = datetime.now()
    # Format the datetime string
    datetime_str = current_time.strftime("%Y%m%d_%H%M%S")
    return datetime_str

load_dotenv()

# Step 1: Load PDF documents
loader = PyPDFLoader("../sample_documents/Kidney-Stones-Patient-Guide.pdf")
documents = loader.load()

full_text = "\n".join([doc.page_content for doc in documents])

# Step 2: Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=400)
docs = text_splitter.create_documents([full_text])

# Step 3: Embed and store in vector DB (FAISS)
embeddings = CohereEmbeddings(model="embed-english-light-v3.0")
vectorstore = FAISS.from_documents(docs, embeddings)

# Step 4: Create conversational memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")

# Step 5: Load LLM (ChatGPT 3.5 Turbo or other)
llm = ChatGroq(model="mistral-saba-24b", temperature=0)


from langchain_core.callbacks import BaseCallbackHandler


# Step 6: Create Conversational Retrieval Chain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    memory=memory,
    return_source_documents=True,
)

# Step 7: Simulate a chat loop
if __name__ == "__main__":
    print("RAG Chatbot (type 'exit' to quit)")
    chat_history = []

    timestamp = get_datetime_str()

    while True:
        query = input("\nUser: ")

        if query.lower() in ["exit", "quit"]:
            break
            
        prompts_dir = f"prompts/{timestamp}/{query}"
        os.makedirs(prompts_dir, exist_ok=True)

        llm_call_n = 0
        class PromptCallback(BaseCallbackHandler):
            def on_llm_start(self, serialized, prompts, **kwargs):
                global llm_call_n
                open(f"{prompts_dir}/prompt_{llm_call_n}.txt", "a+").write(prompts[0])
                llm_call_n += 1

        # Initialize with your chain
        callback = PromptCallback()

        result = qa_chain.invoke(query, config={"callbacks": [callback]})

        for msg in result["chat_history"]:
            msg.pretty_print()

        time.sleep(1)  # Give the output time to show up
        clear_output(wait=True)
        
        # Append to history
        chat_history.append((query, result["answer"]))

        open(f"{prompts_dir}/answer.txt", "a+").write(result['answer'])


What type of stone is formed due to high volume of uric acid in urine?

Uric acid stones are formed due to high levels of uric acid in the urine.

How can we prevent forming them?

To prevent forming uric acid stones, you can take the following steps:

1. **Medications**: Your healthcare provider may prescribe medications such as:
   - **Allopurinol**: This medication lowers the level of uric acid in the blood and urine, helping to prevent uric acid stones.
   - **Potassium citrate**: This medication makes the urine less acidic or more alkaline, which helps prevent uric acid stones.

2. **Dietary Changes**:
   - **Limit Animal Protein**: Reduce your intake of meat, fish, seafood, poultry, pork, lamb, mutton, and game meat. These foods can increase uric acid levels.
   - **Increase Fruits and Vegetables**: A diet rich in fruits and vegetables can help make the urine less acidic.
   - **Stay Hydrated**: Drink plenty of fluids, especially water, to increase urine volume and dilute the ur

### Re-generating answers by passing internally generated prompts directly to the LLM

In [15]:
import re
import os

def extract_messages_from_rephrase_prompt(rephrase_prompt_path):
    lst = open(rephrase_prompt_path).read().split("Human: ")[1:]
    messages = []
    for i, item in enumerate(lst):
        if i == 0:
            messages.append({"role": "user", "content": item})
        elif i == len(lst)-1:
            user_assistant = item.split("Assistant: ")
            assistant_followup = user_assistant[1].split("\n")
            assistant = "\n".join(assistant_followup[:-2])
            followup = "\n".join(assistant_followup[-2:])
            messages.append({"role": "user", "content": user_assistant[0]})
            messages.append({"role": "assistant", "content": assistant})
            messages.append({"role": "user", "content": followup})
        else:
            user_assistant = item.split("Assistant: ")
            messages.append({"role": "user", "content": user_assistant[0]})
            messages.append({"role": "assistant", "content": user_assistant[1]})
    return messages

def extract_messages_from_final_prompt(final_prompt_path):
    text = open(final_prompt_path).read()
    system_match = re.search(r"System:(.*?)Human:", text, re.DOTALL)
    human_match = re.search(r"Human:(.*?)$", text, re.DOTALL)
    result = {
        "system": system_match.group(1).strip() if system_match else None,
        "human": human_match.group(1).strip() if human_match else None,
    }
    messages = [
        {"role": "system", "content": result["system"]},
        {"role": "user", "content": result["human"]},
    ]
    return messages

def test_prompts(prompts_dir):
    len_prompts = len(os.listdir(prompts_dir))
    if len_prompts == 2:
        messages = extract_messages_from_final_prompt(f"{prompts_dir}/prompt_0.txt")
        implicit_ans = open(f"{prompts_dir}/answer.txt").read()  # ANSWER BY PASSING PROMPT IMPLICITLY
        explicit_ans = llm.invoke(input=messages).content  # ANSWER BY PASSING PROMPT EXPLICITLY

        if implicit_ans == explicit_ans:
            print("Both answers, from langchain and by passing final prompt to llm are the same!")
    else:
        messages = extract_messages_from_rephrase_prompt(f"{prompts_dir}/prompt_0.txt")
        response = llm.invoke(input=messages)
        implicit_rephrased_question = open(f"{prompts_dir}/prompt_1.txt").read().split("Human: ")[-1]  # REPHRASED QUESTION BY PASSING PROMPT IMPLICITLY
        explicit_rephrased_question = response.content  # REPHRASED QUESTION BY PASSING PROMPT EXPLICITLY

        if implicit_rephrased_question == explicit_rephrased_question:
            print("Both rephrased questions, from langchain and by passing rephrase prompt to llm are the same!")
        
        messages = extract_messages_from_final_prompt(f"{prompts_dir}/prompt_1.txt")
        implicit_ans = open(f"{prompts_dir}/answer.txt").read()
        explicit_ans = llm.invoke(input=messages).content

        if implicit_ans == explicit_ans:
            print("Both answers, from langchain and by passing final prompt to llm are the same!")

In [16]:
prompts_dir = "prompts/20250413_083110/How can we prevent forming them?"
test_prompts(prompts_dir)

Both rephrased questions, from langchain and by passing rephrase prompt to llm are the same!
Both answers, from langchain and by passing final prompt to llm are the same!


In [4]:
# import re

# def extract_prompts(text):
#     system_match = re.search(r'System:(.*?)Human:', text, re.DOTALL)
#     human_match = re.search(r'Human:(.*?)$', text, re.DOTALL)
    
#     return {
#         "system": system_match.group(1).strip() if system_match else None,
#         "human": human_match.group(1).strip() if human_match else None
#     }

# def get_messages(prompt_dir):
#     file_cnt = len(os.listdir(prompt_dir))
#     if file_cnt == 3:
#         lst = open(f"{prompt_dir}/prompt_0.txt").read().split("Human: ")[1:]

#         rephrase_question = []

#         for i, item in enumerate(lst):
#             if i == 0:
#                 rephrase_question.append({"role": "user", "content": item})
#             elif i == len(lst)-1:
#                 user_assistant = item.split("Assistant: ")
#                 assistant_followup = user_assistant[1].split("\n")
#                 assistant = "\n".join(assistant_followup[:-2])
#                 followup = "\n".join(assistant_followup[-2:])
#                 rephrase_question.append({"role": "user", "content": user_assistant[0]})
#                 rephrase_question.append({"role": "assistant", "content": assistant})
#                 rephrase_question.append({"role": "user", "content": followup})
#             else:
#                 user_assistant = item.split("Assistant: ")
#                 rephrase_question.append({"role": "user", "content": user_assistant[0]})
#                 rephrase_question.append({"role": "assistant", "content": user_assistant[1]})
        
#         answer_question = []

#         prompt1 = open(f"{prompt_dir}/prompt_1.txt").read()
#         result = extract_prompts(prompt1)
#         answer_question.append({"role": "system", "content": result["system"]})
#         answer_question.append({"role": "user", "content": result["human"]})

#         return rephrase_question, answer_question

#     else:
#         answer_question = []

#         prompt0 = open(f"{prompt_dir}/prompt_0.txt").read()
#         result = extract_prompts(prompt0)
#         answer_question.append({"role": "system", "content": result["system"]})
#         answer_question.append({"role": "user", "content": result["human"]})

#         return answer_question

# prompt_dir = "/home/vikas/Projects/Document-QA-System/tests/prompts/20250412_205246/How can we prevent forming them?"
# rephrase_question, answer_question = get_messages(prompt_dir)

# rephrased_question = llm.invoke(rephrase_question).content

# print(rephrased_question)

# answer = llm.invoke(answer_question).content

# print(answer)