In [None]:
import os
import streamlit as st
from src.pdf_processor import read_pdf
from src.embeddings import build_embeddings_and_chunks
from src.vectorstore import VectorStore
from src.retriever import Retriever

st.title("PDF Question Answering")

# Point to your PDF
pdf_path = st.text_input("PDF path", "C:/Users/chven/OneDrive/Documents/aaa_Books/Hands on machine learing book.pdf")

# Load / build index
if st.button("Index PDF"):
    st.info("Reading PDF and building index (this may take a while)...")
    text = read_pdf(pdf_path)
    chunks = build_embeddings_and_chunks(text)  # returns list of chunks
    vs = VectorStore.from_texts(chunks)         # creates & saves local FAISS
    st.success("Index created and saved (faiss_vector_store).")

# Load existing vector store
vs = None
if os.path.exists("faiss_vector_store"):
    try:
        vs = VectorStore.load_local("faiss_vector_store")
    except Exception:
        vs = None

question = st.text_input("Enter your question")

if question:
    if not vs:
        st.error("No vector store found. Click 'Index PDF' first.")
    else:
        retriever = Retriever(vs)
        with st.spinner("Retrieving and generating answer..."):
            answer = retriever.retrieve_answer(question)
        st.subheader("Answer")
        st.write(answer)

ImportError: cannot import name 'build_embeddings_and_chunks' from 'src.embeddings' (c:\Users\chven\OneDrive\Documents\GitHub\RAG_project\streamlit-rag-app\src\embeddings.py)

In [None]:
from pathlib import Path
from PyPDF2 import PdfReader

def read_pdf(path):
    p = Path(path)
    if not p.exists():
        raise FileNotFoundError(path)
    reader = PdfReader(str(p))
    pages = [(page.extract_text() or "") for page in reader.pages]
    return "\n\n".join(pages)

In [None]:
# Load PDF and create embeddings
pdf_path = st.text_input("PDF path", "C:/Users/chven/OneDrive/Documents/aaa_Books/Hands on machine learing book.pdf")
pdf_text = read_pdf(pdf_path)
embeddings = create_embeddings(pdf_text)
vector_store = VectorStore(embeddings)

# Streamlit Application for PDF Question Answering

In [None]:
# Set up Streamlit interface
st.title('PDF Question Answering App')
st.write('Ask questions about the content of the PDF document.')

# User input for questions
question = st.text_input('Enter your question:')

# Process the question and retrieve answer
if question:
    retriever = Retriever(vector_store)
    answer = retriever.retrieve_answer(question)
    st.write('Answer:', answer)