<a href="https://colab.research.google.com/github/vedantpople4/LLM/blob/main/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers faiss-cpu sentence-transformers torch

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m64.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1


In [2]:
import faiss
from sentence_transformers import SentenceTransformer

class Retriever:
    def __init__(self, documents):
        self.documents = documents
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.index = self._create_index()

    def _create_index(self):
        embeddings = self.model.encode(self.documents)
        dimension = embeddings.shape[1]
        index = faiss.IndexFlatL2(dimension)
        index.add(embeddings)
        return index

    def retrieve(self, query, k=3):
        query_embedding = self.model.encode([query])
        _, indices = self.index.search(query_embedding, k)
        return [self.documents[i] for i in indices[0]]

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM

class Generator:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
        self.model = AutoModelForCausalLM.from_pretrained("gpt2")

    def generate(self, prompt, max_length=100):
        inputs = self.tokenizer(prompt, return_tensors="pt")
        outputs = self.model.generate(**inputs, max_length=max_length)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

In [5]:
class RAG:
    def __init__(self, documents):
        self.retriever = Retriever(documents)
        self.generator = Generator()

    def answer(self, query):
        retrieved_docs = self.retriever.retrieve(query)
        context = " ".join(retrieved_docs)
        prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
        return self.generator.generate(prompt)

documents = [
    "The capital of France is Paris.",
    "The Eiffel Tower is located in Paris.",
    "Paris is known as the City of Light.",
]

rag = RAG(documents)
question = "What is the capital of France?"
answer = rag.answer(question)
print(answer)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Context: The capital of France is Paris. Paris is known as the City of Light. The Eiffel Tower is located in Paris.

Question: What is the capital of France?

Answer: The capital of France is Paris. Paris is known as the City of Light. The Eiffel Tower is located in Paris.

Question: What is the capital of France?

Answer: The capital of France is Paris. Paris is known as the City of Light.
