In [None]:
!pip install PyPDF2

In [None]:

# FINAL PROJECT
# PDF Question-Answer


# Install Libraries
!pip install -qU langchain langgraph langchain-community pypdf faiss-cpu reportlab tiktoken > /dev/null


#  — IMPORT LIBRARIES

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
import os
import re
# Import regex module

# CREATE / LOAD PDF


pdf_path = "my_questions.pdf"

if not os.path.exists(pdf_path):
    c = canvas.Canvas(pdf_path, pagesize=letter)
    text = c.beginText(40, 730)
    text.textLine("My Custom Question-Answer PDF")
    text.moveCursor(0, 20)

    qa_data = [
        ("What is Artificial Intelligence (AI)?",
         "Artificial Intelligence is the simulation of human intelligence processes by machines, especially computer systems."),
        ("What is Ohm’s Law?",
         "Ohm’s Law states that current through a conductor is directly proportional to the voltage across it at constant temperature: V = I × R."),
        ("Define Machine Learning.",
         "Machine Learning is a branch of AI that allows systems to automatically learn and improve from experience without being explicitly programmed."),
        ("What is an Algorithm?",
         "An algorithm is a step-by-step procedure or formula for solving a problem."),
        ("Define Newton’s Third Law.",
         "For every action, there is an equal and opposite reaction.")
    ]

    for q, a in qa_data:
        text.textLine("")
        text.textLine("Q: " + q)
        text.textLine("A: " + a)

    c.drawText(text)
    c.showPage()
    c.save()
    print(f"✅ Custom PDF created successfully: {pdf_path}")
else:
    print(f"📄 Using existing PDF: {pdf_path}")


#LOAD & SPLIT THE PDF

loader = PyPDFLoader(pdf_path)
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(docs)
print(f"📘 Loaded '{pdf_path}' and split into {len(chunks)} chunks")

# CREATE EMBEDDINGS & FAISS STORE (Offline)

# Using open-source embeddings model from HuggingFace (no API key required)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vector_store = FAISS.from_documents(chunks, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 1}) # Set k to 1 to retrieve the most relevant chunk

print(" Embeddings generated and FAISS vector store created (offline).")


def answer_query(query):
    retrieved = retriever.invoke(query)

    if not retrieved:
        return {
            "question": query,
            "answer": "No relevant information found in the PDF.",
            "reference": "None"
        }

    # Combine retrieved chunks into a single string
    context = " ".join([d.page_content.strip() for d in retrieved])

    # Find the answer corresponding to the question using regex
    # This pattern looks for the query (case-insensitive) followed by 'A:' and captures the text until the next 'Q:' or end of string
    # Added re.escape to handle special characters in query
    pattern = re.escape(query) + r'.*?A: (.*?)(?:Q:|$)'
    match = re.search(pattern, context, re.DOTALL | re.IGNORECASE)

    if match:
        answer_text = match.group(1).strip()
        # Clean up potential artifacts from splitting or formatting
        answer_text = answer_text.split('\nQ:')[0].strip() # Stop at the next Q: if present
        return {
            "question": query,
            "answer": answer_text,
            "reference": "PDF"
        }
    else:
        return {
            "question": query,
            "answer": "Could not find a specific answer for this question in the PDF.",
            "reference": "PDF (Context Found)"
        }


print("\n Try asking questions like:")
print(" - What is Artificial Intelligence?")
print(" - What is Ohm’s Law?")
print(" - Define Machine Learning.")
print(" - Define Newton’s Third Law.\n")

user_query = input("Enter your question: ")
result = answer_query(user_query)


print("\n==============================")
print(f"Question: {result['question']}")
print(f"Answer: {result['answer']}")
print(f"Reference: {result['reference']}")
print("==============================")

with open("output_result.txt", "w", encoding="utf-8") as f:
    f.write("Question: " + result["question"] + "\n\n")
    f.write("Answer: " + result["answer"] + "\n\n")
    f.write("Reference: " + result["reference"] + "\n")

print(" Result saved as 'output_result.txt' (download from Files panel).")