In [None]:
import sqlite3
import os
from dotenv import load_dotenv

load_dotenv()
DB_PATH = os.getenv("DB_PATH", "data/chat_history.db")

class ChatDatabase:
    def __init__(self):
        self.conn = sqlite3.connect(DB_PATH, check_same_thread=False)
        self.cursor = self.conn.cursor()
        self._create_table()

    def _create_table(self):
        self.cursor.execute("""
            CREATE TABLE IF NOT EXISTS chat_history (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                session_id TEXT,
                user_message TEXT,
                bot_response TEXT,
                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
            )
        """)
        self.conn.commit()

    def save_chat(self, session_id, user_message, bot_response):
        self.cursor.execute("""
            INSERT INTO chat_history (session_id, user_message, bot_response)
            VALUES (?, ?, ?)
        """, (session_id, user_message, bot_response))
        self.conn.commit()

    def get_chat_history(self, session_id, limit=100):
        """Retrieves the last 'limit' messages from the session."""
        self.cursor.execute("""
            SELECT user_message, bot_response FROM chat_history
            WHERE session_id = ?
            ORDER BY timestamp DESC
            LIMIT ?
        """, (session_id, limit))
        return self.cursor.fetchall()[::-1]  # Reverse to maintain chronological order

    def get_all_sessions(self):
        self.cursor.execute("""
            SELECT DISTINCT session_id FROM chat_history ORDER BY timestamp DESC
        """)
        return [row[0] for row in self.cursor.fetchall()]

In [61]:
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
import os

load_dotenv()
LLM_API_KEY = os.getenv("LLM_API_KEY")
LLM_MODEL = os.getenv("LLM_MODEL", "default-llm")

class ResponseGenerator:
    def __init__(self):
        self.llm = ChatGroq(api_key=LLM_API_KEY, model=LLM_MODEL)

    def generate_response(self, query, retrieved_docs, history):
        context = "\n\n".join([doc.page_content for doc in retrieved_docs])

        history_context = "\n".join([f"User: {h[0]}\nBot: {h[1]}" for h in history])
        
        messages = [
            SystemMessage(content="You are an AI assistant trained to provide accurate answers based on context. If the provided context is not enough, respond with 'I don't have enough information to answer this question.'"),
            HumanMessage(content=f"Context:\n{context}\n\nHistory:\n{history_context}\n\nQuestion: {query}\nAnswer:")
        ]

        return self.llm(messages)


In [62]:
import os
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from dotenv import load_dotenv

load_dotenv()
VECTOR_STORE_PATH = os.getenv("VECTOR_STORE_PATH", "data/chroma_db")
PROCESSED_FOLDER = "data/processed"

class DocumentRetriever:
    def __init__(self):
        self.db = None
        self.embeddings = HuggingFaceEmbeddings()
        self.load_database()

    def load_database(self):
        if not os.path.exists(PROCESSED_FOLDER):
            os.makedirs(PROCESSED_FOLDER)

        print("Loading processed documents...")
        docs = []
        for file in os.listdir(PROCESSED_FOLDER):
            if file.endswith(".pdf"):
                loader = PyPDFLoader(os.path.join(PROCESSED_FOLDER, file))
                docs.extend(loader.load())

        print(f"Loaded {len(docs)} processed documents.")
        if docs:
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
            texts = text_splitter.split_documents(docs)

            if not os.path.exists(VECTOR_STORE_PATH):
                os.makedirs(VECTOR_STORE_PATH)

            self.db = Chroma.from_documents(texts, self.embeddings, persist_directory=VECTOR_STORE_PATH)
            self.db.persist()
        else:
            self.db = Chroma(persist_directory=VECTOR_STORE_PATH, embedding_function=self.embeddings)

    def retrieve_documents(self, query):
        return self.db.similarity_search(query, k=3)

In [63]:
import uuid
from langchain.memory import ConversationBufferWindowMemory

class Chatbot:
    def __init__(self, session_id=None):
        self.db = ChatDatabase()
        self.retriever = DocumentRetriever()
        self.generator = ResponseGenerator()

        self.session_id = session_id if session_id else str(uuid.uuid4())

        # LangChain memory: Store last 5 exchanges per session
        self.memory = ConversationBufferWindowMemory(k=5, memory_key="chat_history", return_messages=True)

        # Load existing history into memory
        history = self.db.get_chat_history(self.session_id)
        for user_msg, bot_msg in history:
            self.memory.chat_memory.add_user_message(user_msg)
            self.memory.chat_memory.add_ai_message(bot_msg)

    def chat(self, user_input):
        """Handles a single chat interaction with memory."""
        docs = self.retriever.retrieve_documents(user_input)

        # Retrieve last 5 conversations
        history = self.db.get_chat_history(self.session_id, limit=5)

        response = self.generator.generate_response(user_input, docs, history)
        response_text = response.content

        # Save chat to DB and Memory
        self.db.save_chat(self.session_id, user_input, response_text)
        self.memory.chat_memory.add_user_message(user_input)
        self.memory.chat_memory.add_ai_message(response_text)

        return {
            "query": user_input,
            "response": response_text,
            "reference": [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]
        }

    def get_history(self):
        return self.db.get_chat_history(self.session_id)

    def get_all_sessions(self):
        return self.db.get_all_sessions()

In [64]:
bot = Chatbot()

Loading processed documents...
Loaded 5 processed documents.


  self.memory = ConversationBufferWindowMemory(k=5, memory_key="chat_history", return_messages=True)


In [65]:
query = "What is the policy for refunds?"
result = bot.chat(query)

In [66]:
result

{'query': 'What is the policy for refunds?',
 'response': 'Based on the provided context, the policy for refunds is as follows:\n\n1. The Bureau Expo 2020 Dubai will prepare a refund claim and request the Authority to refund the amount if the claim is correct.\n\n2. The Authority and Bureau Expo 2020 Dubai must agree on the procedural, evidential, and verification requirements that need to be met by the Office of the Official Participant or any other Person to be eligible for the refund claim.\n\n3. A Certificate on Entitlement will be prepared by the Authority.\n\nThese requirements outline the process for issuing refunds, which involves agreement between the Bureau Expo 2020 Dubai and the Authority, as well as the completion of specific requirements and the issuance of a Certificate on Entitlement.',
 'reference': [{'content': 'prepared by the Authority.  Where the refund claim is correct, the Bureau Expo \n2020 Dubai makes a request to the Authority to refund the amount. \n \nArticl

In [67]:
bot.get_history()

[('What is the policy for refunds?',
  'Based on the provided context, the policy for refunds is as follows:\n\n1. The Bureau Expo 2020 Dubai will prepare a refund claim and request the Authority to refund the amount if the claim is correct.\n\n2. The Authority and Bureau Expo 2020 Dubai must agree on the procedural, evidential, and verification requirements that need to be met by the Office of the Official Participant or any other Person to be eligible for the refund claim.\n\n3. A Certificate on Entitlement will be prepared by the Authority.\n\nThese requirements outline the process for issuing refunds, which involves agreement between the Bureau Expo 2020 Dubai and the Authority, as well as the completion of specific requirements and the issuance of a Certificate on Entitlement.')]

In [68]:
result2 = bot.chat("What is mentioned related to power of authority?")

In [69]:
result2

{'query': 'What is mentioned related to power of authority?',
 'response': 'Based on the provided context, the following is mentioned related to the power of the authority:\n\n1. The Authority has the power to refund the amount if the claim is correct, as per Article 1 of Cabinet Decision No. 1 of 2020.\n\n2. The Authority and Bureau Expo 2020 Dubai must agree on the procedural, evidential, and verification requirements that need to be met by the Office of the Official Participant or any other Person to be eligible for the refund claim. This indicates that the Authority has the power to set conditions for refund eligibility.\n\n3. The Authority is responsible for preparing a Certificate on Entitlement, which suggests that the Authority has the power to issue certificates that may be required for refund claims.',
 'reference': [{'content': '2 Cabinet Decision No. 1 of 2020 - Issue Date: 2 January 2020 – Unofficial translation \n \n \nArticle 1 - Definitions \nIn the application of the p

In [70]:
bot.get_history()

[('What is mentioned related to power of authority?',
  'Based on the provided context, the following is mentioned related to the power of the authority:\n\n1. The Authority has the power to refund the amount if the claim is correct, as per Article 1 of Cabinet Decision No. 1 of 2020.\n\n2. The Authority and Bureau Expo 2020 Dubai must agree on the procedural, evidential, and verification requirements that need to be met by the Office of the Official Participant or any other Person to be eligible for the refund claim. This indicates that the Authority has the power to set conditions for refund eligibility.\n\n3. The Authority is responsible for preparing a Certificate on Entitlement, which suggests that the Authority has the power to issue certificates that may be required for refund claims.'),
 ('What is the policy for refunds?',
  'Based on the provided context, the policy for refunds is as follows:\n\n1. The Bureau Expo 2020 Dubai will prepare a refund claim and request the Authorit

In [71]:
bot.get_all_sessions()

['76d7de9d-6a2e-469e-bae4-852651cc13f0',
 '60a905fd-d19c-4bde-b31b-1a2cb93c078f',
 '0bcc2d36-b510-4863-b1f5-e59ec5631910',
 'cd176620-8ef5-4741-a2da-aaa1d1b2f57c',
 'f02f9ae4-90f8-4e5e-9204-2d11a8b62e03',
 'b28eb6ed-fa2a-4e68-8bf2-471760fea780',
 '95160a07-fc11-4c97-9115-a968d65b010b']

In [72]:
result3 = bot.chat("what was the last question?")

In [73]:
result3

{'query': 'what was the last question?',
 'response': 'The last question was: What was the last question?',
 'reference': [{'content': '4 Cabinet Decision No. 1 of 2020 - Issue Date: 2 January 2020 – Unofficial translation \n \n \n \nArticle 3 – Application for Refund \n1. An application to refund the Tax stipulated in Clauses 1 and 2 of Article 2 is \nsubmitted to the Bureau Expo 2020 Dubai. \n2. The Bureau Expo 2020 Dubai performs the initial check on the refund claims \nreceived from the Office of the Official Participants in accordance with the forms',
   'metadata': {'page': 3,
    'page_label': '4',
    'source': 'data/processed\\279926_cabinet_decision_1_2020__refund_of_vat_paid_on_goods_and_services_connected_with_expo_2020.pdf'}},
  {'content': '4 Cabinet Decision No. 1 of 2020 - Issue Date: 2 January 2020 – Unofficial translation \n \n \n \nArticle 3 – Application for Refund \n1. An application to refund the Tax stipulated in Clauses 1 and 2 of Article 2 is \nsubmitted to the

In [74]:
bot.memory

ConversationBufferWindowMemory(chat_memory=InMemoryChatMessageHistory(messages=[HumanMessage(content='What is the policy for refunds?', additional_kwargs={}, response_metadata={}), AIMessage(content='Based on the provided context, the policy for refunds is as follows:\n\n1. The Bureau Expo 2020 Dubai will prepare a refund claim and request the Authority to refund the amount if the claim is correct.\n\n2. The Authority and Bureau Expo 2020 Dubai must agree on the procedural, evidential, and verification requirements that need to be met by the Office of the Official Participant or any other Person to be eligible for the refund claim.\n\n3. A Certificate on Entitlement will be prepared by the Authority.\n\nThese requirements outline the process for issuing refunds, which involves agreement between the Bureau Expo 2020 Dubai and the Authority, as well as the completion of specific requirements and the issuance of a Certificate on Entitlement.', additional_kwargs={}, response_metadata={}), 