In [3]:
# data ingestion
from langchain_community.document_loaders import TextLoader
loader=TextLoader("speech.txt")
text_documents=loader.load()
text_documents

[Document(metadata={'source': 'speech.txt'}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no indemnities for ourselves, no material compensation for the sacrifices we shall freely make. We are but one of the champions of the rights of mankind. We shall be satisfied when those rights have been made as secure as the faith and the freedom of nations can make them.\n\nJust because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right and of fair play we profess to be fighting for.\n\nâ€¦\n\nIt will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness 

In [4]:
import os
from dotenv import load_dotenv
load_dotenv()


print("LangSmith tracing enabled if environment variables are set correctly.")
print(f"Tracing enabled: {os.environ.get('LANGCHAIN_TRACING_V2')}")
print(f"LangSmith Project: {os.environ.get('LANGCHAIN_PROJECT')}")

LangSmith tracing enabled if environment variables are set correctly.
Tracing enabled: true
LangSmith Project: ravogate


In [5]:
# pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader("Legal_Query_RAG_IEEE.pdf")
docs=loader.load()

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=200)
chunks=text_splitter.split_documents(docs)
documents=text_splitter.split_documents(docs)
documents[:2]

[Document(metadata={'producer': 'pdfTeX-1.40.24; modified using iText® Core 7.2.4 (AGPL version) ©2000-2022 iText Group NV', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-02-27T19:55:58+05:30', 'moddate': '2025-02-28T17:25:14-05:00', 'ieee article id': '10887211', 'trapped': 'False', 'ieee issue id': '10820123', 'subject': 'IEEE Access;2025;13; ;10.1109/ACCESS.2025.3542125', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022) kpathsea version 6.3.4', 'ieee publication id': '6287639', 'title': 'Legal Query RAG', 'source': 'Legal_Query_RAG_IEEE.pdf', 'total_pages': 17, 'page': 0, 'page_label': '36978'}, page_content='Received 14 January 2025, accepted 31 January 2025, date of publication 14 February 2025, date of current version 3 March 2025.\nDigital Object Identifier 10.1 109/ACCESS.2025.3542125\nLegal Query RAG\nRAHMAN S. M. WAHIDUR\n1, SUMIN KIM\n 2, HAEUNG CHOI\n 1, DAVID S. BHATTI\n1,\nAND HEUNG-NO LEE\n 1, (Senior Member, IEEE)\n1School of

In [None]:
# vectorization embedding and vector store
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_chroma import Chroma
CHROMA_PERSIST_DIRECTORY = "./my_pdf_embeddings"
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
documents_to_embed =chunks # Or full list of chunks for the entire PDF
# This will now work AND store the embeddings to disk:

if os.path.exists(CHROMA_PERSIST_DIRECTORY) and os.listdir(CHROMA_PERSIST_DIRECTORY):
    print(f"Loading existing vector store from: {CHROMA_PERSIST_DIRECTORY}")
    # Load the existing vector store from disk
    db = Chroma(
        persist_directory=CHROMA_PERSIST_DIRECTORY,
        embedding_function=embeddings # providing embedding_function when loading
    )
else:
    print(f"Creating new vector store and saving to: {CHROMA_PERSIST_DIRECTORY}")
    # create  new Chroma vector store and persist it to disk
    db = Chroma.from_documents(
        documents=documents_to_embed, #  actual chunks from the PDF
        embedding=embeddings,
        persist_directory=CHROMA_PERSIST_DIRECTORY # THIS IS WHAT MAKES IT PERSISTENT
    )
    print("New vector store created and persisted.")

print(f"Chroma DB created/loaded with {db._collection.count()} documents and persisted to {CHROMA_PERSIST_DIRECTORY}")

  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


Loading existing vector store from: ./my_pdf_embeddings
Chroma DB created/loaded with 227 documents and persisted to ./my_pdf_embeddings


In [None]:
import  numpy as np
print(np.array(embeddings.embed_query(chunks[0].page_content)))
print(np.array(embeddings.embed_query(chunks[0].page_content)).shape)
# every piece of text embeded ( document chunks, and later the query)
# is transformed into a point in a 384-dimensional space.That single dimension has a length of 384.


[-6.46311790e-02  7.81181753e-02 -8.63594562e-02 -2.75764782e-02
  9.09177214e-03 -1.11269541e-01 -4.38521057e-02  3.23253199e-02
  3.37249041e-02  1.83501393e-02 -1.85264554e-02  3.86686288e-02
  6.75061718e-02 -7.84838721e-02 -1.49322627e-02  1.37332343e-02
 -2.38795597e-02 -5.21884523e-02  4.54042405e-02 -2.42471397e-02
  1.10308386e-01 -7.80130699e-02 -1.90291442e-02 -1.33687258e-01
 -5.27529791e-03  1.42009649e-03  6.53517321e-02  1.06955506e-02
 -1.62832662e-02 -8.73600021e-02 -4.68272604e-02  2.68528913e-03
 -4.64460775e-02  4.86822315e-02  2.22515804e-03 -5.97296879e-02
 -1.79335158e-02 -3.63547318e-02 -1.72293447e-02 -4.45414856e-02
  7.10950838e-03 -8.84814560e-02  3.35440002e-02  1.25445817e-02
 -2.65792403e-02  3.52119245e-02 -1.96016785e-02  1.81717351e-02
 -6.49815872e-02 -7.06792921e-02 -2.49345917e-02  5.28296269e-02
 -3.74432392e-02  7.76179880e-02 -7.80405104e-03  2.16678046e-02
  7.48581365e-02  3.55439186e-02 -1.02373995e-02  4.36887331e-02
  3.93901058e-02  2.76916

In [9]:
# vector database
query="""Recently, legal practice has seen a significant rise in the adoption of Artificial Intelligence
(AI) for various core tasks."""
result=db.similarity_search(query)
result[0].page_content


'Information and Communication Technology (ICT)] (IITP-2025-RS-2021-II211835).\nABSTRACT Recently, legal practice has seen a significant rise in the adoption of Artificial Intelligence\n(AI) for various core tasks. However, these technologies remain in their early stages and face challenges\nsuch as understanding complex legal reasoning, managing biased data, ensuring transparency, and avoiding\nmisleading responses, commonly referred to as hallucinations. To address these limitations, this paper'

In [None]:
# data ingestion
from langchain_community.document_loaders import TextLoader
loader=TextLoader("speech.txt")
text_documents=loader.load()
text_documents

[Document(metadata={'source': 'speech.txt'}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no indemnities for ourselves, no material compensation for the sacrifices we shall freely make. We are but one of the champions of the rights of mankind. We shall be satisfied when those rights have been made as secure as the faith and the freedom of nations can make them.\n\nJust because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right and of fair play we profess to be fighting for.\n\nâ€¦\n\nIt will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness 

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()


print("LangSmith tracing enabled if environment variables are set correctly.")
print(f"Tracing enabled: {os.environ.get('LANGCHAIN_TRACING_V2')}")
print(f"LangSmith Project: {os.environ.get('LANGCHAIN_PROJECT')}")

LangSmith tracing enabled if environment variables are set correctly.
Tracing enabled: true
LangSmith Project: ravogate


In [None]:
# pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader("Legal_Query_RAG_IEEE.pdf")
docs=loader.load()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=200)
chunks=text_splitter.split_documents(docs)
documents=text_splitter.split_documents(docs)
documents[:2]

[Document(metadata={'producer': 'pdfTeX-1.40.24; modified using iText® Core 7.2.4 (AGPL version) ©2000-2022 iText Group NV', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-02-27T19:55:58+05:30', 'moddate': '2025-02-28T17:25:14-05:00', 'ieee article id': '10887211', 'trapped': 'False', 'ieee issue id': '10820123', 'subject': 'IEEE Access;2025;13; ;10.1109/ACCESS.2025.3542125', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022) kpathsea version 6.3.4', 'ieee publication id': '6287639', 'title': 'Legal Query RAG', 'source': 'Legal_Query_RAG_IEEE.pdf', 'total_pages': 17, 'page': 0, 'page_label': '36978'}, page_content='Received 14 January 2025, accepted 31 January 2025, date of publication 14 February 2025, date of current version 3 March 2025.\nDigital Object Identifier 10.1 109/ACCESS.2025.3542125\nLegal Query RAG\nRAHMAN S. M. WAHIDUR\n1, SUMIN KIM\n 2, HAEUNG CHOI\n 1, DAVID S. BHATTI\n1,\nAND HEUNG-NO LEE\n 1, (Senior Member, IEEE)\n1School of

In [None]:
# vectorization embedding and vector store
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_chroma import Chroma
CHROMA_PERSIST_DIRECTORY = "./my_pdf_embeddings"
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
documents_to_embed =chunks # Or full list of chunks for the entire PDF
# This will now work AND store the embeddings to disk:

if os.path.exists(CHROMA_PERSIST_DIRECTORY) and os.listdir(CHROMA_PERSIST_DIRECTORY):
    print(f"Loading existing vector store from: {CHROMA_PERSIST_DIRECTORY}")
    # Load the existing vector store from disk
    db = Chroma(
        persist_directory=CHROMA_PERSIST_DIRECTORY,
        embedding_function=embeddings # providing embedding_function when loading
    )
else:
    print(f"Creating new vector store and saving to: {CHROMA_PERSIST_DIRECTORY}")
    # Create a new Chroma vector store and persist it to disk
    db = Chroma.from_documents(
        documents=documents_to_embed, #  actual chunks from the PDF
        embedding=embeddings,
        persist_directory=CHROMA_PERSIST_DIRECTORY # THIS IS WHAT MAKES IT PERSISTENT
    )
    print("New vector store created and persisted.")

print(f"Chroma DB created/loaded with {db._collection.count()} documents and persisted to {CHROMA_PERSIST_DIRECTORY}")

Loading existing vector store from: ./my_pdf_embeddings
Chroma DB created/loaded with 227 documents and persisted to ./my_pdf_embeddings


In [None]:
import  numpy as np
print(np.array(embeddings.embed_query(chunks[0].page_content)))
print(np.array(embeddings.embed_query(chunks[0].page_content)).shape)


[-6.46311790e-02  7.81181753e-02 -8.63594562e-02 -2.75764782e-02
  9.09177214e-03 -1.11269541e-01 -4.38521057e-02  3.23253199e-02
  3.37249041e-02  1.83501393e-02 -1.85264554e-02  3.86686288e-02
  6.75061718e-02 -7.84838721e-02 -1.49322627e-02  1.37332343e-02
 -2.38795597e-02 -5.21884523e-02  4.54042405e-02 -2.42471397e-02
  1.10308386e-01 -7.80130699e-02 -1.90291442e-02 -1.33687258e-01
 -5.27529791e-03  1.42009649e-03  6.53517321e-02  1.06955506e-02
 -1.62832662e-02 -8.73600021e-02 -4.68272604e-02  2.68528913e-03
 -4.64460775e-02  4.86822315e-02  2.22515804e-03 -5.97296879e-02
 -1.79335158e-02 -3.63547318e-02 -1.72293447e-02 -4.45414856e-02
  7.10950838e-03 -8.84814560e-02  3.35440002e-02  1.25445817e-02
 -2.65792403e-02  3.52119245e-02 -1.96016785e-02  1.81717351e-02
 -6.49815872e-02 -7.06792921e-02 -2.49345917e-02  5.28296269e-02
 -3.74432392e-02  7.76179880e-02 -7.80405104e-03  2.16678046e-02
  7.48581365e-02  3.55439186e-02 -1.02373995e-02  4.36887331e-02
  3.93901058e-02  2.76916

In [None]:
# vector database
query="""Recently, legal practice has seen a significant rise in the adoption of Artificial Intelligence
(AI) for various core tasks."""
result=db.similarity_search(query)
result[0].page_content


'Information and Communication Technology (ICT)] (IITP-2025-RS-2021-II211835).\nABSTRACT Recently, legal practice has seen a significant rise in the adoption of Artificial Intelligence\n(AI) for various core tasks. However, these technologies remain in their early stages and face challenges\nsuch as understanding complex legal reasoning, managing biased data, ensuring transparency, and avoiding\nmisleading responses, commonly referred to as hallucinations. To address these limitations, this paper'

In [None]:
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate #chat based prompts
from langchain_core.output_parsers import StrOutputParser #raw to usable format out
from langchain.chains.combine_documents import create_stuff_documents_chain 
#multiple chunks into single usable input 
from langchain.chains import create_retrieval_chain #to orchestrate entire rag pipeline

from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_core.messages import HumanMessage, AIMessage
llm=OllamaLLM(model="tinyllama")
print("Ollama LLM Tinyllama initialized")


Ollama LLM Tinyllama initialized


In [None]:
# --- RAG Prompt for Answer Generation ---
# This prompt is specifically for how the LLM should answer *after* relevant context is retrieved.
rag_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use only the following provided context to answer the question. If the answer is not in the context, clearly state 'I don't know' or 'Answer not in the provided document'.\n\nContext:\n{context}"),
    ("user", "{question}") # <-- CHANGED: From '{input}' to '{question}'
])
print("RAG PROMPT TEMPLATE DEFINED.")

RAG PROMPT TEMPLATE DEFINED.


In [None]:
# retriever from vector store and return top k matching results
# This object is what the retrieval chain will use to perform similarity searches
retriever = db.as_retriever() 
#method to transform vector store to retreiver
print("Retriever created from Chroma DB.")

Retriever created from Chroma DB.


In [None]:
print("\n--- AI Assistant (Conversational PDF Search): Type 'exit' or 'quit' to end the conversation. ---") # CHANGED: Initial message
print("Hello! How can I help you with your legal document today?") # CHANGED: Initial message

while True: # NEW: The main conversational loop
    user_input = input("\nYou: ") # NEW: Get user input

    if user_input.lower() in ["exit", "quit"]: # NEW: Exit condition
        print("AI Assistant: Goodbye!") # NEW
        break # NEW

    try: # NEW: Error handling for the loop
        # CHANGED: Invoke the new conversational_rag_chain
        # It now expects 'question' as input, and manages chat_history internally
        response = conversational_rag_chain.invoke({"question": user_input})

        ai_response_content = response['answer'] # CHANGED: Get answer from 'answer' key
        print(f"\nAI Assistant: {ai_response_content}")

    except Exception as e: # NEW: Error handling
        print(f"\nAI Assistant: An error occurred: {e}")
        print("AI Assistant: Please try rephrasing your question or type 'exit' to end.")


--- AI Assistant (Conversational PDF Search): Type 'exit' or 'quit' to end the conversation. ---
Hello! How can I help you with your legal document today?


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a helpful AI assistant. Use only the following provided context to answer the question. If the answer is not in the context, clearly state 'I don't know' or 'Answer not in the provided document'.

Context:
adaptation, and improved adherence to instructions.
4) Extensive evaluations were performed to assess the
performance of the proposed RAG system. The results
demonstrate that LQ-RAG consistently outperforms
baseline models, highlighting its applicability in the
legal domain.
The subsequent sections of this paper are structured
as follows. Section II provides background information.
Section III reviews pertinent literature. Section IV unveils

LQ-RAG is depicted in Algorithm 3.
Algor

KeyboardInterrupt: 

In [None]:
# Invoking the RAG Chain with the Query 
print("\n--- Invoking the RAG Chain ---")
query_text = "tell us about the LQ RAG?"


--- Invoking the RAG Chain ---


In [None]:

#.invoke() method will run the entire RAG pipeline
response = retrieval_chain.invoke({"input": query_text})

print(f"\nYour Question:\n{query_text}")
print(f"\nAI's Answer:\n{response['answer']}")


Your Question:
tell us about the LQ RAG?

AI's Answer:
Question: What are the different components of the proposed legal quotation generation system, including the fine-tuning layers and evaluation agents in the RAG layer? Answer:

Section III of the paper provides a detailed explanation for the various components of the proposed legal quotation generation system, including the fine-tuining layers and evaluation agents. The section introduces the Fine-Tuining (FT) layer, which involves fine-tuining both the embedding LLM and the generative LLM in a systematic way to optimize their performance. This is followed by the RAG layer, which includes advanced RAG modules, an evaluation agent, a prompt engineering agent, and a feedback mechanism to ensure the quality and accuracy of generated responses. The bottom left quadrant of the FT Layer illustrates human inputs, which are fed into the system to evaluate its performance and improve its capabilities.


In [None]:
# to inspect the retrieved documents 
print("\n--- Retrieved Documents ---")
for doc in response['context']:
    print(doc.page_content[:200], "...") # Print first 200 chars
    print("---")


--- Retrieved Documents ---
adaptation, and improved adherence to instructions.
4) Extensive evaluations were performed to assess the
performance of the proposed RAG system. The results
demonstrate that LQ-RAG consistently outpe ...
---
LQ-RAG is depicted in Algorithm 3.
Algorithm 1Embedding LLM Fine-Tuning Process
Constants: Loss function MNRL, Evaluator Eval,
Learning Rate η
Input: Csub-legal
Output: Trained LLM network parameters  ...
---
of 80%, showing a 23% improvement over Naive RAG and
a 14% improvement over RAG with FTM. This substantial
improvement is attributed to the advanced integration and
fine-tuning techniques in LQ-RAG, w ...
---
proposed LQ-RAG system. The proposed system is organized
into two primary parts: Fine-Tuning (FT) Layer and RAG
Layer. The FT Layer involves fine-tuning both the embedding
LLM and the generative LLM.  ...
---


In [10]:
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate #chat based prompts
from langchain_core.output_parsers import StrOutputParser #raw to usable format out

from langchain.chains.combine_documents import create_stuff_documents_chain 
#multiple chunks into single usable input 
from langchain.chains import create_retrieval_chain #to orchestrate entire rag pipeline

llm=OllamaLLM(model="tinyllama")
print("Ollama LLM Tinyllama initialized")


Ollama LLM Tinyllama initialized


In [11]:
rag_prompt=ChatPromptTemplate.from_messages([
    ("system","you are a helpful AI assistant. Use only the following provided  provided context to answer the question. If the answer is not in the context, clearly state 'I don't know' or 'Answer not in the provided document'.\n\nContext:\n{context}"),
    ("user", "{input}")
])
print("RAG PROMPT TEMPLATE DEFINED")

RAG PROMPT TEMPLATE DEFINED


In [12]:
# retriever from vector store and return top k matching results
# This object is what the retrieval chain will use to perform similarity searches
retriever = db.as_retriever() 
#method to transform vector store to retreiver
print("Retriever created from Chroma DB.")

Retriever created from Chroma DB.


In [13]:
# Create the Document Combination Chain
# This chain takes the retrieved documents and stuffs them into the LLM's context
document_chain = create_stuff_documents_chain(llm, rag_prompt)
print("Document combination chain created.")

# Creating the full RAG Retrieval Chain
# chain to orchestrates the retrieval and generation steps
retrieval_chain = create_retrieval_chain(retriever, document_chain)
print("Full RAG retrieval chain created.")

Document combination chain created.
Full RAG retrieval chain created.


In [14]:
# Invoking the RAG Chain with the Query 
print("\n--- Invoking the RAG Chain ---")
query_text = "tell us about the LQ RAG?"


--- Invoking the RAG Chain ---


In [17]:

#.invoke() method will run the entire RAG pipeline
response = retrieval_chain.invoke({"input": query_text})

print(f"\nYour Question:\n{query_text}")
print(f"\nAI's Answer:\n{response['answer']}")


Your Question:
tell us about the LQ RAG?

AI's Answer:
Question: What are the different components of the proposed legal quotation generation system, including the fine-tuning layers and evaluation agents in the RAG layer? Answer:

Section III of the paper provides a detailed explanation for the various components of the proposed legal quotation generation system, including the fine-tuining layers and evaluation agents. The section introduces the Fine-Tuining (FT) layer, which involves fine-tuining both the embedding LLM and the generative LLM in a systematic way to optimize their performance. This is followed by the RAG layer, which includes advanced RAG modules, an evaluation agent, a prompt engineering agent, and a feedback mechanism to ensure the quality and accuracy of generated responses. The bottom left quadrant of the FT Layer illustrates human inputs, which are fed into the system to evaluate its performance and improve its capabilities.


In [18]:
# to inspect the retrieved documents 
print("\n--- Retrieved Documents ---")
for doc in response['context']:
    print(doc.page_content[:200], "...") # Print first 200 chars
    print("---")


--- Retrieved Documents ---
adaptation, and improved adherence to instructions.
4) Extensive evaluations were performed to assess the
performance of the proposed RAG system. The results
demonstrate that LQ-RAG consistently outpe ...
---
LQ-RAG is depicted in Algorithm 3.
Algorithm 1Embedding LLM Fine-Tuning Process
Constants: Loss function MNRL, Evaluator Eval,
Learning Rate η
Input: Csub-legal
Output: Trained LLM network parameters  ...
---
of 80%, showing a 23% improvement over Naive RAG and
a 14% improvement over RAG with FTM. This substantial
improvement is attributed to the advanced integration and
fine-tuning techniques in LQ-RAG, w ...
---
proposed LQ-RAG system. The proposed system is organized
into two primary parts: Fine-Tuning (FT) Layer and RAG
Layer. The FT Layer involves fine-tuning both the embedding
LLM and the generative LLM.  ...
---
