In [None]:
"""
A complete step-by-step guide for setting up a RAG implementation in VSCode from scratch:
    While not strictly necessary, using a virtual environment is highly recommended to avoid package conflicts:
"""

bash

# Create a new project directory
mkdir rag_project
cd rag_project

# Create a virtual environment
python -m venv venv

# Activate the virtual environment
# On Windows:
venv\Scripts\activate
# On macOS/Linux:
source venv/bin/activate

# With your virtual environment activated, install the necessary packages:
pip install langchain
pip install langchain-community
pip install langchain-openai
pip install openai
pip install faiss-cpu
pip install pypdf

# Create a .env file in your project directory:
touch .env

# Add your OpenAI API key to the .env file:
OPENAI_API_KEY=your-api-key-here

# Create a new file rag.py and add this basic RAG implementation:
# Select your Python interpreter:
Press Cmd+Shift+P (Mac) or Ctrl+Shift+P (Windows)
Type "Python: Select Interpreter"
Choose the interpreter from your virtual environment

In [None]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

# Load environment variables
load_dotenv()

# The API key is now loaded from .env file
class RAGSystem:
    def __init__(self, pdf_path):
        self.pdf_path = pdf_path
        self.load_documents()
        self.setup_retrieval_system()

    def load_documents(self):
        """Load and process the PDF document"""
        if not os.path.exists(self.pdf_path):
            raise FileNotFoundError(f"PDF file not found at {self.pdf_path}")
        
        loader = PyPDFLoader(self.pdf_path)
        self.documents = loader.load()

    def setup_retrieval_system(self):
        """Set up the retrieval system with embeddings and vector store"""
        embeddings = OpenAIEmbeddings()
        self.vector_store = FAISS.from_documents(self.documents, embeddings)
        retriever = self.vector_store.as_retriever()
        
        # Initialize language model and QA chain
        llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever
        )

    def ask_question(self, query):
        """Ask a question about the document"""
        response = self.qa_chain.invoke({"query": query})
        return response

    def save_vector_store(self, path="vector_store.index"):
        """Save the FAISS index for future use"""
        self.vector_store.save_local(path)
        print(f"Vector store saved to {path}")

def main():
    # Initialize the RAG system with your PDF
    pdf_path = "your_pdf_path.pdf"  # Replace with your PDF path
    rag = RAGSystem(pdf_path)

    # Example question
    question = "What does the document say about fertility?"
    response = rag.ask_question(question)
    print(f"Question: {question}")
    print(f"Response: {response}")

    # Save the vector store
    rag.save_vector_store()

if __name__ == "__main__":
    main()