In [2]:
import os
from langchain.prompts import ChatPromptTemplate
from models.utils import chat_llm
from pinecone import Pinecone
from models.utils import embeddings
from langchain_pinecone import PineconeVectorStore
from operator import itemgetter
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.output_parsers import StrOutputParser

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("PINECONE_INDEX")
pc = Pinecone(api_key=api_key)
index = pc.Index(index_name)
embedding_fn = embeddings()
vector_store = PineconeVectorStore(index=index, embedding=embedding_fn, namespace='pdf/physics.pdf')

In [4]:
llm = chat_llm()

In [52]:
retriever=vector_store.as_retriever(
            search_type="mmr",
            search_kwargs={'k': 3, 'lambda_mult': 0.25}
        )
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=retriever, llm=llm
)

In [53]:
retriever_from_llm.invoke({"question": "what is cubical expansivity?"})

INFO:langchain.retrievers.multi_query:Generated queries: ['1. Definition of cubical thermal expansion', '2. What causes volume expansion in materials?', '3. How does temperature affect the volume of a solid?']


[Document(metadata={'page': 4.0, 'question': '7. 2071 Set D Q.No. 2 a', 'reference': '', 'source': 'pdf/physics.pdf'}, page_content='7. 2071 Set D Q.No. 2 a Define the coefficient of cubical expansion of a solid and hence, write an expression for the variation of its density with temperature.\n$2 a$ Coefficient of cubical expansion of solid is defined as the change in volume per unit original volume per unit rise in temperature.'),
 Document(metadata={'page': 37.0, 'question': '16. 2076 GIE Set A Q.No. 6b', 'reference': '', 'source': 'pdf/physics.pdf'}, page_content='16. 2076 GIE Set A Q.No. 6b Define thermal conductivity of substance. Deduce an expression for the thermal conductivity of a good conductor in steady state.\n17. Thermal Conductivity: The thermal conductivity of material of a body is defined as the amount of heat that flows in one second across the opposite faces of a unit cube, whose opposite faces are kept at a temperature difference of 1 K . The coefficient of thermal c

In [54]:
# Set logging for the queries
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [55]:
mutli_query = retriever_from_llm.invoke({"question": "what is cubical expansivity?"})
mutli_query

INFO:langchain.retrievers.multi_query:Generated queries: ['1. Definition of cubical thermal expansion', '2. What causes volume expansion in materials?', '3. How does temperature affect the volume of a solid?']


[Document(metadata={'page': 4.0, 'question': '7. 2071 Set D Q.No. 2 a', 'reference': '', 'source': 'pdf/physics.pdf'}, page_content='7. 2071 Set D Q.No. 2 a Define the coefficient of cubical expansion of a solid and hence, write an expression for the variation of its density with temperature.\n$2 a$ Coefficient of cubical expansion of solid is defined as the change in volume per unit original volume per unit rise in temperature.'),
 Document(metadata={'page': 37.0, 'question': '16. 2076 GIE Set A Q.No. 6b', 'reference': '', 'source': 'pdf/physics.pdf'}, page_content='16. 2076 GIE Set A Q.No. 6b Define thermal conductivity of substance. Deduce an expression for the thermal conductivity of a good conductor in steady state.\n17. Thermal Conductivity: The thermal conductivity of material of a body is defined as the amount of heat that flows in one second across the opposite faces of a unit cube, whose opposite faces are kept at a temperature difference of 1 K . The coefficient of thermal c

In [58]:
from langchain_core.documents import Document
from langchain.load import dumps, loads

def rrf(documents=list[Document], k:int = 60):
    """Reciprocal_rank_fusion that takes multiple lists of ranked documents and an optional parameter k used in the RRF formula"""
    scores = {}
    for rank, docs in enumerate(documents):
        doc_str = dumps(docs)
        if doc_str not in scores:
            scores[doc_str] = 0
        previous_score = scores[doc_str]
        scores[doc_str] += 1/(rank + k)

        reranked_results = []
        for doc, score in sorted(scores.items(), key=lambda x: x[1], reverse=True):
            reranked_results.append((loads(doc), score))
    return reranked_results

In [59]:
retrieval_chain_rag_fusion = retriever_from_llm | rrf
docs = retrieval_chain_rag_fusion.invoke({"question": "what is cubical expansivity?"})

INFO:langchain.retrievers.multi_query:Generated queries: ['1. Definition of cubical thermal expansion', '2. What causes volume expansion in materials?', '3. How does temperature affect the volume of a solid?']


In [60]:
docs

[(Document(metadata={'page': 4.0, 'question': '7. 2071 Set D Q.No. 2 a', 'reference': '', 'source': 'pdf/physics.pdf'}, page_content='7. 2071 Set D Q.No. 2 a Define the coefficient of cubical expansion of a solid and hence, write an expression for the variation of its density with temperature.\n$2 a$ Coefficient of cubical expansion of solid is defined as the change in volume per unit original volume per unit rise in temperature.'),
  0.016666666666666666),
 (Document(metadata={'page': 37.0, 'question': '16. 2076 GIE Set A Q.No. 6b', 'reference': '', 'source': 'pdf/physics.pdf'}, page_content='16. 2076 GIE Set A Q.No. 6b Define thermal conductivity of substance. Deduce an expression for the thermal conductivity of a good conductor in steady state.\n17. Thermal Conductivity: The thermal conductivity of material of a body is defined as the amount of heat that flows in one second across the opposite faces of a unit cube, whose opposite faces are kept at a temperature difference of 1 K . T

In [None]:
chain = (
    {
        "context": retriever,
        "question": itemgetter("question")
    }
    | prompt
    | llm
    | StrOutputParser()
    # | (lambda x: x[].replace("\\\\", "\\"))
)
answer = chain.invoke({"question": user_query})


In [14]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [15]:
from langchain_core.output_parsers import StrOutputParser

generate_queries = (
    prompt_rag_fusion 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [21]:
generate_queries.invoke({"question": "what is cubical expansivity?"})

['1.  Cubical expansivity definition',
 '2.  Properties of cubical expansivity',
 '3.  Examples of cubical expansivity',
 '4.  Cubical expansivity vs other expansivity types']

In [16]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
question = "what is cubical expansivity?"
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

  (loads(doc), score)


6

In [17]:
docs

[(Document(metadata={'page': 5.0, 'question': '10. 2070 Set D Q.No. 2 a', 'reference': '', 'source': 'pdf/physics.pdf'}, page_content='10. 2070 Set D Q.No. 2 a Does the cubical expansivity of a liquid depend on its original volume? Explain.\n$>$ No, the cubical expansivity of a liquid does not depend upon original volume of liquid.\nThe cubical expansitivity of a liquid is given as, $\\gamma=\\frac{\\Delta V}{V\\left(\\theta_{2}-\\theta_{1}\\right)}$\nThe ratio of $\\frac{\\Delta V}{V}$ is same in each case for a liquid so, the coefficient of cubical expansion is same for all'),
  0.06666666666666667),
 (Document(metadata={'page': 12.0, 'question': '41. 2056 Q.No. 5 a', 'reference': '2075 GIE Q.No. 6a', 'source': 'pdf/physics.pdf'}, page_content='41. 2056 Q.No. 5 a Define linear and cubical expansivities. Derive a relation between them.\n$[1+1+3]$\n$\\Rightarrow$ Please refer to 2075 GIE Q.No. 6a'),
  0.048651507139079855),
 (Document(metadata={'page': 17.0, 'question': '67. 2068 Old Q

In [18]:
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Based on the provided documents, cubical expansivity is defined in the context of a liquid as:\n\n$\\\\gamma=\\\\frac{\\\\Delta V}{V\\\\left(\\\\theta_{2}-\\\\theta_{1}\\\\right)}$\n\nWhere:\n\n*   $\\\\gamma$ represents the cubical expansivity.\n*   $\\\\Delta V$ is the change in volume.\n*   $V$ is the original volume.\n*   $\\\\theta_{2}-\\\\theta_{1}$ is the change in temperature.\n\nAdditionally, one document mentions the need to "Define linear and cubical expansivities. Derive a relation between them" and refers to another document for the answer.'

In [1]:
from indexing.load_file import ocr_image

text = ocr_image("/Users/xaxm/Desktop/IMG_20250623_172644.jpg")

In [2]:
text

'What is Cubical expanciity?'

In [4]:
import json
langchains=[]
json.loads(json.dumps((langchains)))

[]

In [2]:
from langchain import hub
from operator import itemgetter
from models.utils import chat_llm
from langchain.chains import LLMChain
from pinecone_db.pinecone_client import load_pinecone
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.chains import create_history_aware_retriever
from langchain.chains.retrieval import create_retrieval_chain
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain.chains.combine_documents import create_stuff_documents_chain

def rag_chain(user_query: str, chat_history: list):
    """RAG chain"""

    # Initialize Pinecone database
    vector_store = load_pinecone()

    # # Prompt
    # template = """Answer the question based only on the following context"""
    # prompt = ChatPromptTemplate.from_template(template)

    rephrase_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """Given a chat history and the latest user question which might reference context in the chat history, 
            formulate a standalone question which can be understood without the chat history. Do NOT answer the question, 
            just reformulate it if needed and otherwise return it as is."""),
            MessagesPlaceholder(variable_name="history", optional=True),
            ("human", "{input}"),
        ]
    )

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "Answer the question based only on the following context and chat history"),
            ("system", "Context: {context}"),
            MessagesPlaceholder(variable_name="history", optional=True),
            ("human", "{input}"),
        ]
    )

    # Initialize ChatModel
    llm = chat_llm()
    retriever = vector_store.as_retriever()
    
    chat_retriever_chain = create_history_aware_retriever(llm, retriever, rephrase_prompt)
    doc_chain = create_stuff_documents_chain(llm, prompt)
    chain = create_retrieval_chain(chat_retriever_chain, doc_chain)

    # chain = (
    #     {
    #         "context": retriever,
    #         "question": RunnablePassthrough()
    #     }
    #     | prompt
    #     | llm
    #     | StrOutputParser()
    # )

    answer = chain.invoke({
        "input": user_query, 
        "history": chat_history
    })
    return answer

In [3]:
op = rag_chain("what is cubical expansivity?", [])

In [4]:
op

{'input': 'what is cubical expansivity?',
 'history': [],
 'context': [Document(id='32. 2071 Set C Q.No. 6 c', metadata={'page': 9.0, 'question': '32. 2071 Set C Q.No. 6 c', 'reference': '', 'source': 'pdf/physics.pdf'}, page_content='32. 2071 Set C Q.No. 6 c Does cubical expansivity depend upon the initial volume of a solid? Write the unit of this expansivity. Also derive its relation with superficial expansivity.\n$>$ No, the cubical expansivity of a solid does not depend upon initial volume of solid.\nThe cubical expansitivity of a solid is given as\n$\\gamma=\\frac{\\Delta \\mathrm{V}}{\\mathrm{V}\\left(\\theta_{2}-\\theta_{1}\\right)}$\nThe ratio of $\\frac{\\Delta \\mathrm{V}}{\\mathrm{V}}$ is same in each case for a solid so, the coefficient of cubical expansion is same for all volume of a solid and does not depend on the volume. The unit of this expansivity is per kelvin ( $\\mathrm{K}^{-1}$ ). Relation between $\\beta$ and $\\gamma$ : Let us consider a cube whose sides having 

In [None]:
sources = []
for doc in op["context"]:
    sources.append(
        {
            "source": doc.metadata.get("source", "Unknown"),
            "page no.": doc.metdata.get("page", "N/A"),
            "question": doc.metadata.get("question", ""),
            "reference": doc.metadata.get(re)
        }
    )

In [5]:
metadata_list = [doc.metadata for doc in op["context"]]

In [6]:
metadata_list

[{'page': 9.0,
  'question': '32. 2071 Set C Q.No. 6 c',
  'reference': '',
  'source': 'pdf/physics.pdf'},
 {'page': 5.0,
  'question': '10. 2070 Set D Q.No. 2 a',
  'reference': '',
  'source': 'pdf/physics.pdf'},
 {'page': 7.0,
  'question': '26. 2075 GIE Q.No. 6a',
  'reference': '',
  'source': 'pdf/physics.pdf'},
 {'page': 4.0,
  'question': '7. 2071 Set D Q.No. 2 a',
  'reference': '',
  'source': 'pdf/physics.pdf'}]