In [39]:
import sqlite3
import os
from dotenv import load_dotenv
from typing import TypedDict, List, Annotated, Dict, Any
from langgraph.graph import StateGraph, END
import uuid
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

load_dotenv()

# Database and Vector Store paths
DB_PATH = os.getenv("DB_PATH", "data/chat_history.db")
VECTOR_STORE_PATH = os.getenv("VECTOR_STORE_PATH", "data/chroma_db")
PROCESSED_FOLDER = "data/processed"
LLM_API_KEY = os.getenv("LLM_API_KEY")
LLM_MODEL = os.getenv("LLM_MODEL")
HISTORY_CONTEXT = 5
RETRIEVE_DOCS = 3

In [40]:
# Database Layer
class ChatDatabase:
    def __init__(self):
        self.conn = sqlite3.connect(DB_PATH, check_same_thread=False)
        self._create_table()

    def _create_table(self):
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS chat_history (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                session_id TEXT,
                user_message TEXT,
                bot_response TEXT,
                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
            )
        """)
        self.conn.commit()

    def save_chat(self, session_id: str, user_message: str, bot_response: str):
        self.conn.execute("""
            INSERT INTO chat_history (session_id, user_message, bot_response)
            VALUES (?, ?, ?)
        """, (session_id, user_message, bot_response))
        self.conn.commit()

    def get_chat_history(self, session_id: str, limit: int = HISTORY_CONTEXT) -> List[tuple]:
        cursor = self.conn.cursor()
        cursor.execute("""
            SELECT user_message, bot_response FROM chat_history
            WHERE session_id = ?
            ORDER BY timestamp DESC
            LIMIT ?
        """, (session_id, limit))
        return cursor.fetchall()[::-1]

In [41]:
# Document Retrieval Layer
class DocumentRetriever:
    def __init__(self):
        self.embeddings = HuggingFaceEmbeddings()
        self.db = self._initialize_vectorstore()

    def _initialize_vectorstore(self):
        if not os.path.exists(PROCESSED_FOLDER):
            os.makedirs(PROCESSED_FOLDER)

        docs = []
        for file in os.listdir(PROCESSED_FOLDER):
            if file.endswith(".pdf"):
                loader = PyPDFLoader(os.path.join(PROCESSED_FOLDER, file))
                docs.extend(loader.load())

        if docs:
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
            texts = text_splitter.split_documents(docs)
            return Chroma.from_documents(texts, self.embeddings, persist_directory=VECTOR_STORE_PATH)
        
        return Chroma(persist_directory=VECTOR_STORE_PATH, embedding_function=self.embeddings)

    def retrieve_documents(self, query: str, k=RETRIEVE_DOCS) -> List[Any]:
        return self.db.similarity_search(query, k)


In [42]:
# LLM Response Generation
class ResponseGenerator:
    def __init__(self):
        self.llm = ChatGroq(api_key=LLM_API_KEY, 
                          model=LLM_MODEL)

    def generate_response(self, context: str, history: str, query: str) -> str:
        messages = [
            SystemMessage(content="You are an AI assistant. Provide accurate answers using the context. If unsure, say so."),
            HumanMessage(content=f"Context:\n{context}\n\nHistory:\n{history}\n\nQuestion: {query}\nAnswer:")
        ]
        return self.llm(messages).content


In [43]:
# LangGraph State Definition
class ChatState(TypedDict):
    session_id: str
    user_input: str
    retrieved_docs: List[Any]
    response: str
    history: Annotated[List[Dict[str, str]], lambda x, y: x + y]

In [44]:
# LangGraph Nodes
class ChatNodes:
    def __init__(self):
        self.db = ChatDatabase()
        self.retriever = DocumentRetriever()
        self.generator = ResponseGenerator()

    def retrieve_documents(self, state: ChatState) -> Dict:
        docs = self.retriever.retrieve_documents(state["user_input"])
        return {"retrieved_docs": docs}

    def generate_response(self, state: ChatState) -> Dict:
        context = "\n\n".join([doc.page_content for doc in state["retrieved_docs"]])
        history = "\n".join([f"User: {msg['user']}\nBot: {msg['bot']}" 
                           for msg in state.get("history", [])[-HISTORY_CONTEXT:]])
        
        response = self.generator.generate_response(
            context=context,
            history=history,
            query=state["user_input"]
        )
        return {"response": response}

    def save_conversation(self, state: ChatState) -> Dict:
        self.db.save_chat(
            state["session_id"],
            state["user_input"],
            state["response"]
        )
        return {
            "history": [{
                "user": state["user_input"],
                "bot": state["response"]
            }]
        }


In [45]:
# LangGraph Workflow Setup
def create_workflow():
    nodes = ChatNodes()
    workflow = StateGraph(ChatState)

    workflow.add_node("retrieve", nodes.retrieve_documents)
    workflow.add_node("generate", nodes.generate_response)
    workflow.add_node("save", nodes.save_conversation)

    workflow.set_entry_point("retrieve")
    workflow.add_edge("retrieve", "generate")
    workflow.add_edge("generate", "save")
    workflow.add_edge("save", END)

    return workflow.compile()


In [46]:
# Chat Interface
class LangGraphChat:
    def __init__(self):
        self.workflow = create_workflow()
        self.db = ChatDatabase()

    def chat(self, user_input: str, session_id: str = None) -> Dict:
        session_id = session_id or str(uuid.uuid4())
        history = self.db.get_chat_history(session_id, limit=HISTORY_CONTEXT)
        
        initial_state = {
            "session_id": session_id,
            "user_input": user_input,
            "retrieved_docs": [],
            "response": "",
            "history": [{"user": h[0], "bot": h[1]} for h in history]
        }

        result = self.workflow.invoke(initial_state)
        return {
            "session_id": session_id,
            "response": result["response"],
            "references": [{
                "content": doc.page_content,
                "metadata": doc.metadata
            } for doc in result["retrieved_docs"]]
        }
    


In [53]:
chatbot = LangGraphChat()
result = chatbot.chat("What is the refund policy?")
print(f"Response: {result['response']}")
print(f"Session ID: {result['session_id']}")
print(f"References: {result['references']}")

Response: Based on the provided context, the refund policy appears to be governed by Articles 4 and 5, which outline the requirements and process for refund claims. 

The refund policy is that where the refund claim is correct, the Bureau Expo 2020 Dubai makes a request to the Authority to refund the amount. 

However, more specific details about the refund policy, such as the types of refunds, eligibility criteria, and the refund process, are not explicitly stated in the provided context.
Session ID: dbcf2aaa-7bab-441f-a896-ca32248fb88c
References: [{'content': 'prepared by the Authority.  Where the refund claim is correct, the Bureau Expo \n2020 Dubai makes a request to the Authority to refund the amount. \n \nArticle 4 – Requirements for Refund \nThe Authority and Bureau Expo 2020 Dubai shall agree on procedural, evidential, \nand verification requirements which need to be met by the Office of the Official \nParticipant or any other Person to be eligible to the refund claim. \n \nAr

In [48]:
result

{'session_id': '329346c6-8425-494f-a14f-ddcb2a701648',
 'response': "Based on the provided context, it appears that the refund policy involves the following steps:\n\n1. The Bureau Expo 2020 Dubai makes a request to the Authority to refund the claimed amount.\n2. To be eligible for the refund, the Office of the Official Participant or any other Person must meet the agreed-upon procedural, evidential, and verification requirements.\n3. If the refund claim is correct, the Authority will provide a Certificate on Entitlement.\n\nHowever, I must note that I'm missing some crucial information, such as the specific details about the refund policy and the agreed-upon requirements. To provide a more accurate answer, I would need more information or context.",
 'references': [{'content': 'prepared by the Authority.  Where the refund claim is correct, the Bureau Expo \n2020 Dubai makes a request to the Authority to refund the amount. \n \nArticle 4 – Requirements for Refund \nThe Authority and Bu

In [49]:
# a3949cea-bfaf-4150-9acb-ffc2b1c522d0
response = chatbot.chat("Can you elaborate on point 3?", session_id="a3949cea-bfaf-4150-9acb-ffc2b1c522d0")

In [50]:
response

{'session_id': 'a3949cea-bfaf-4150-9acb-ffc2b1c522d0',
 'response': "Based on the provided context, point 3 states that where a Certificate of Entitlement has been granted to the Official Participant in respect of imports and supplies covered by Paragraphs (a) and (b) of Clause 1 of Article 2 of this Decision, the Official Participant is required to inform the Bureau.\n\nTo elaborate, the Certificate of Entitlement is a document issued by the Authority in response to the request made by the Bureau Expo 2020 Dubai. The Certificate serves as evidence of the entitlement to the refund, and it is typically prepared by the Authority after verifying the refund claim and meeting the agreed-upon requirements.\n\nThe Certificate of Entitlement would likely include information such as:\n\n- The name of the Official Participant\n- The amount of the refund\n- Any other relevant details\n\nThe Bureau Expo 2020 Dubai would then use this Certificate to support their request for a refund to the Authori

In [51]:
history = chatbot.db.get_chat_history("a3949cea-bfaf-4150-9acb-ffc2b1c522d0", limit=5)

In [52]:
history

[('What is the refund policy?',
  'Based on the provided context, the refund policy involves the following steps:\n\n1. The Bureau Expo 2020 Dubai makes a request to the Authority to refund the amount, provided the refund claim is correct.\n2. The Authority and Bureau Expo 2020 Dubai agree on procedural, evidential, and verification requirements to be met by the Office of the Official Participant or any other Person to be eligible for the refund claim (Article 4: Requirements for Refund).\n3. Where the refund claim is correct, the Bureau Expo 2020 Dubai makes a request to the Authority to refund the amount, and the Authority prepares a Certificate on Entitlement (Article 5: Certificate on Entitlement).\n\nTherefore, the refund policy is that the Bureau Expo 2020 Dubai requests the Authority for a refund if the claim is correct, after meeting the agreed-upon requirements and obtaining the Certificate on Entitlement from the Authority.'),
 ('Can you elaborate on point 3?',
  "Based on th