# RAG Demo: Retrieval-Augmented Generation with Local Documents
This notebook demonstrates how to enhance an LLM with external knowledge using document retrieval.

In [2]:
# Install dependencies (for Colab users, uncomment below lines)
# !pip install transformers faiss-cpu sentence-transformers

In [7]:
from sentence_transformers import SentenceTransformer
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import faiss
import numpy as np

ImportError: cannot import name 'cached_download' from 'huggingface_hub' (c:\Proj\RAG\rag-demo-env\Lib\site-packages\huggingface_hub\__init__.py)

In [None]:
# Sample documents (knowledge base)
docs = [
    "ProductX is the latest widget released in 2024. It features improved battery life.",
    "To reset ProductX, hold the power button for 10 seconds until the LED blinks.",
    "Our support plans include Basic, Plus, and Enterprise tiers, offering 24/7 support in higher tiers."
]
doc_ids = [f'doc{i}' for i in range(len(docs))]

In [None]:
# Load the embedding model
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = embed_model.encode(docs, convert_to_numpy=True)

In [None]:
# Build FAISS index
dim = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(doc_embeddings)

In [None]:
# Load the language model
model_name = 'google/flan-t5-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
qa_pipeline = pipeline('text2text-generation', model=model, tokenizer=tokenizer)

In [None]:
# Query input
query = "How can I reset ProductX?"
query_vec = embed_model.encode(query, convert_to_numpy=True)
k = 1
distances, indices = index.search(np.array([query_vec]), k)
retrieved_text = docs[indices[0][0]]
print(f'Retrieved: {retrieved_text}')

In [None]:
# Prompt and generate
prompt = f"Context: {retrieved_text}\nQuestion: {query}\nAnswer:"
result = qa_pipeline(prompt, max_length=100)[0]['generated_text']
print(f'Answer: {result}')