Import  Required Modules

In [1]:
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import FAISS
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_groq import ChatGroq
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader

import os
import time
from dotenv import load_dotenv

Load Environment Variables

In [2]:
load_dotenv()
# Load API keys
os.environ['HF_TOKEN'] = os.getenv("HF_TOKEN")
groq_api_key = os.getenv("GROQ_API_KEY")

Initialize Embeddings and LLM

In [3]:
# Intialize Embeddings model
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

#  Intialize LLM model
llm = ChatGroq(groq_api_key=groq_api_key, model_name="llama3-70b-8192")

  from .autonotebook import tqdm as notebook_tqdm


Session History Setup

In [4]:
session_id = "default_session"
store = {}

def get_session_history(session: str) -> BaseChatMessageHistory:
    if session not in store:
        store[session] = ChatMessageHistory()
    return store[session]

Load PDF Documents

In [5]:
def load_documents(pdf_paths):
    documents = []
    successful_loads = 0
    for path in pdf_paths:
        path = path.strip()
        if os.path.exists(path) and path.endswith(".pdf"):
            loader = PyPDFLoader(path)
            docs = loader.load()
            documents.extend(docs)
            successful_loads += 1
            print(f"Successfully loaded {path} with {len(docs)} pages")
        else:
            print(f"Error: File not found or not a PDF: {path}")
    print(f"\nSuccessfully loaded {successful_loads} out of {len(pdf_paths)} PDFs")
    print(f"Total pages loaded: {len(documents)}")
    return documents

Input Documents/PDF file Path

In [6]:
pdf_path_input = input("Enter PDF file paths: ")
pdf_path_list = [path.strip() for path in pdf_path_input.split(',') if path.strip()]
documents = load_documents(pdf_path_list)

Successfully loaded C:\Users\Administrator\Downloads\PDF_Question_Answer_Chatbot\NIPS-2017-attention-is-all-you-need-Paper.pdf with 11 pages

Successfully loaded 1 out of 1 PDFs
Total pages loaded: 11


In [7]:
# Split Documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=75,
    separators=["\n\n", "\n", ".", " ", ""]
)
splits = text_splitter.split_documents(documents)
print(f"Created {len(splits)} document chunks")

Created 78 document chunks


In [8]:
# Vectorstore to store vector embeddings using FAISS
vectorstore = FAISS.from_documents(splits, embedding=embeddings)

In [9]:
# Create Retriever
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}
)

In [10]:
# Prompt for Contextualized Query
contextualize_q_prompt = ChatPromptTemplate.from_messages([
    ("system", "Given a chat history and the latest user question which might reference context..."),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}"),
])

In [11]:
# History-Aware Retriever
history_aware_retriever = create_history_aware_retriever(
    llm,
    retriever,
    contextualize_q_prompt
)

In [12]:
# Question-Annswer Prompt
qa_prompt = ChatPromptTemplate.from_messages([
    ("system", (
        "You are a helpful assistant answering questions about documents. "
        "Use the following retrieved context to answer the user's question. "
        "\n\n{context}\n\n"
        "Instructions:\n1. If context is enough, answer.\n2. If not, say so...\n3. Ask for clarification if unclear.\n"
    )),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}"),
])

In [13]:
# Question-Answer Chain
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

In [14]:
# Combine Retriever and QA Chain into a RAG Chain
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [15]:
# Combine/wrap up rag QA chain with Message History Handling
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

Ask Questions to get Responses based on external knowledge source (PDF)

In [16]:
while True:
    user_input = input("\nYour question (type 'exit' to quit): ").strip()
    if user_input.lower() in ["exit", "quit"]:
        print("Goodbye!")
        break
    if not user_input:
        print("Please enter a question.")
        continue

    try:
        print("Generating response...")
        response = conversational_rag_chain.invoke(
            {"input": user_input},
            config={"configurable": {"session_id": session_id}},
        )
        print("\nAssistant:", response["answer"])
    except Exception as e:
        print(f"Error: {str(e)}")

Generating response...

Assistant: Based on the provided context, the Transformer refers to a specific neural network architecture, particularly in the field of natural language processing. It's a type of sequence-to-sequence model that's primarily used for machine translation tasks, such as English-to-German translation.

The Transformer architecture, as shown in Figure 1, consists of an encoder and a decoder. It uses self-attention mechanisms to relate signals from different input or output positions, allowing it to model complex dependencies between distant positions in a sequence.

The key innovations of the Transformer architecture include:

1. **Multi-Head Attention**: This allows the model to attend to different aspects of the input sequence simultaneously, improving its ability to capture complex relationships.

2. **Self-Attention**: This mechanism enables the model to relate signals from different positions in the input sequence, reducing the computational complexity of model