# RAG Example

To start, let's get GPT and its tokenizer set up

In [5]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

gpt_model = GPT2LMHeadModel.from_pretrained('gpt2')
gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')


Now, set up an embedding model that will encode our content for the vector database. We will use FAISS to start.

In [9]:
from sentence_transformers import SentenceTransformer

# sample documents
documents = [
    "RAG combines retrieval-based methods with generation-based models for improved text generation.",
    "It retrieves relevant information from a large corpus to enhance the generation process.",
    "By using FAISS, we efficiently search over the document embeddings.",
    "GPT models are commonly used for generating natural language responses.",
    "Sentence-Transformers generate high-quality document embeddings."
]

# convert documents into embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(documents)


Now we will add the documents to FAISS' index

In [10]:
import faiss
import numpy as np

d = embeddings.shape[1]  # dimension of embeddings
index = faiss.IndexFlatL2(d)  # L2 distance metric
index.add(np.array(embeddings))  # add embeddings to the index


We are now ready to ask the LLM questions.
This happens in the following steps:
# Ask our question ('query')
# Create an embedding for our query. This will allow the vector database to 

In [4]:
query_embedding = model.encode(["What is RAG?"])[0]
distances, indices = index.search(np.array([query_embedding]), k=5)  # Retrieve top 5


In [6]:
gpt_model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [8]:
indices

array([[ 0,  1, -1, -1, -1]])