 # Implementing a Simple RAG Pipeline


install all the necessary libraries

In [None]:
!pip install langchain_community



Collecting langchain_community
  Downloading langchain_community-0.2.16-py3-none-any.whl.metadata (2.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata (7.2 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_community-0.2.16-py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (

In [None]:
!pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


import the necessary libraries

In [None]:
import requests
from bs4 import BeautifulSoup
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate

Scrape the website content. We can also make a custom Dataset or use pdf data.

In [None]:

# Web scraping function
def scrape_website(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    paragraphs = soup.find_all('p')
    return [p.get_text() for p in paragraphs]

# Clean scraped content
def clean_content(content_list):
    unwanted_items = {'Sign up', 'Sign in', 'Follow', '--', '15', 'Listen'}
    cleaned = [text for text in content_list if text and text not in unwanted_items]
    return cleaned

# Scrape and clean website content
url = 'https://medium.com/@akriti.upadhyay/implementing-rag-with-langchain-and-hugging-face-28e3ea66c5f7'
web_content = scrape_website(url)
cleaned_content = clean_content(web_content)

# Convert cleaned content to Documents
documents = [Document(page_content=text) for text in cleaned_content]


splitting the documnets and creating the embeddings

In [None]:

# Split the documents
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

# Create embeddings and vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(texts, embeddings)




using a gpt2-large model for better generation

In [None]:

# Set up a more capable language model
model_name = "gpt2-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)




Creating the pipeline with max tokens 50

In [None]:
# Create a text-generation pipeline with adjusted parameters
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=50,
    temperature=0.3,
    top_p=0.95,
    repetition_penalty=1.2,
    do_sample=True
)

# Create a HuggingFacePipeline language model
local_llm = HuggingFacePipeline(pipeline=pipe)


Creating a prompt template to get better output for our query

In [None]:

# Set up the retrieval-based QA system with a simplified prompt template
template = """Context: {context}

Question: {question}

Answer the question concisely in one sentence based only on the given context:"""

PROMPT = PromptTemplate(
    template=template, input_variables=["context", "question"]
)

qa = RetrievalQA.from_chain_type(
    llm=local_llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": PROMPT}
)


In [None]:

# Simplified RAG Pipeline function
def rag_pipeline(query):
    response = qa.run(query)
    # Simple post-processing: take the first sentence
    first_sentence = response.split('.')[0] + '.'
    return first_sentence


Generating responses

In [None]:

# Example usage
query = "What embedding is captured?"
result = rag_pipeline(query)
print(f"Query: {query}")
print(f"RAG Response: {result}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: What embedding is captured?
RAG Response: Context: Embeddings capture the semantic meaning of the text which allows you to quickly and efficiently find other pieces of text which are similar.
