In [1]:
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Get the value of the environment variable
openai_api_key = os.getenv("OPENAI_API_KEY")

# **Web Base Loader**

In [2]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()

In [3]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load the web page content
loader = WebBaseLoader("https://www.theguardian.com/us")

document = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

vectorstore = Chroma.from_documents(
    documents=document,
    embedding=embeddings,
    persist_directory="chroma_store"
)

In [5]:
from langchain.chains import RetrievalQA

retriever = vectorstore.as_retriever()


qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [8]:
response=qa_chain.invoke("What is the latest news of Guardian?")
print(response)

{'query': 'What is the latest news of Guardian?', 'result': "The latest news from The Guardian includes stories about Donald Trump's Memorial Day speech, a car ploughing into a crowd at a Liverpool FC victory parade, Russia targeting Ukraine with drone strikes, and more.", 'source_documents': [Document(metadata={'description': "Latest US news, world news, sports, business, opinion, analysis and reviews from the Guardian, the world's leading liberal voice", 'language': 'en', 'source': 'https://www.theguardian.com/us', 'title': 'Latest news, sport and opinion from the Guardian'}, page_content="\n\n\n\n\nLatest news, sport and opinion from the Guardian\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSkip to main contentSkip to navigationPrint subscriptionsSearch jobs Sign inCrimeThe terrifying rise of secret camerasWildlifeThe last-ditch race to save the Orinoco crocodileGallery26The long readAn ancient skull, a di

# **WikiPediaRetriever**

In [9]:
from langchain.retrievers import WikipediaRetriever

retriever = WikipediaRetriever()

result = retriever.invoke("Python programming language")

for doc in result:
    print(f"Title: {doc.metadata['title']}")
    print(f"Content: {doc.page_content[:200]}...")  # Print first 200 characters of content
    print("-" * 80)  # Separator for readability

Title: Python (programming language)
Content: Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.
Python is dynamically type-checked and garbage-...
--------------------------------------------------------------------------------
Title: History of Python
Content: The programming language Python was conceived in the late 1980s, and its implementation was started in December 1989 by Guido van Rossum at CWI in the Netherlands as a successor to ABC capable of exce...
--------------------------------------------------------------------------------
Title: Mojo (programming language)
Content: Mojo is a programming language in the Python family that is currently under development. It is available both in browsers via Jupyter notebooks, and locally on Linux and macOS. Mojo aims to combine th...
--------------------------------------------------------------------------------


# **Multi Query Retriever**

In [11]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import Chroma, FAISS
from langchain.schema import Document
from langchain.retrievers import  MultiQueryRetriever

In [12]:
llm = ChatOpenAI()

In [13]:
docs = [
    Document(page_content="langchain is a framework for building applications with LLMs"),
    Document(page_content="langchain enables the development of applications that can reason, plan, and execute tasks"),
    Document(page_content="langchain is a powerful tool for building AI applications"),
    Document(page_content="langchain provides a simple interface for working with LLMs and other AI models"),
    Document(page_content="langchain is designed to be flexible and extensible, allowing developers to create custom applications"),
    Document(page_content="langchain supports a wide range of AI models and frameworks, making it easy to integrate with existing systems"),
    Document(page_content="langchain is an open-source project that encourages collaboration and community contributions"),
    Document(page_content="langchain is a versatile framework that can be used for various AI applications, including chatbots, virtual assistants, and more"),
    Document(page_content="langchain is a rapidly evolving framework with a growing ecosystem of tools and resources"),
]

In [14]:
embeddings = OpenAIEmbeddings()

vectorstore = FAISS.from_documents(docs, embeddings)
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x246473057b0>

In [17]:
multi_query_retriever = MultiQueryRetriever.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2})
)

In [18]:
query = "Explain langchain in one sentence"
result = multi_query_retriever.invoke(query)

In [19]:
for i, doc in enumerate(result):
    print(f"Result {i + 1}: {doc.page_content}")
    print("-" * 80)  # Separator for readability

Result 1: langchain is a framework for building applications with LLMs
--------------------------------------------------------------------------------
Result 2: langchain is an open-source project that encourages collaboration and community contributions
--------------------------------------------------------------------------------
Result 3: langchain provides a simple interface for working with LLMs and other AI models
--------------------------------------------------------------------------------
Result 4: langchain is a powerful tool for building AI applications
--------------------------------------------------------------------------------


# **Contxtual Retriever**

In [20]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

from langchain.vectorstores import FAISS
from langchain.schema import Document

from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [21]:
llm = ChatOpenAI(temperature=0)
embeddings = OpenAIEmbeddings()

In [22]:
documents = [
    Document(page_content="LangChain allows you to build applications using LLMs like GPT-4."),
    Document(page_content="It supports memory, chains, tools, agents, and retrievers."),
    Document(page_content="FAISS is a library used for vector similarity search."),
    Document(page_content="LangChain is highly modular and supports plug-and-play components."),
    Document(page_content="Chroma is another vector store like FAISS, used for storing embeddings.")
]

In [23]:
vectorstore = FAISS.from_documents(documents, embeddings)

In [24]:
base_retriever= vectorstore.as_retriever(search_kwargs={"k": 4})

In [25]:
base_retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000024647307910>, search_kwargs={'k': 4})

In [26]:
compressor= LLMChainExtractor.from_llm(llm=llm)

In [27]:
compression_retriever= ContextualCompressionRetriever(
   base_retriever=base_retriever,
   base_compressor=compressor
)

In [28]:
results =compression_retriever.invoke("What does langchain enable?")

In [29]:
results

[Document(metadata={}, page_content='LangChain allows you to build applications using LLMs like GPT-4.'),
 Document(metadata={}, page_content='LangChain is highly modular and supports plug-and-play components.'),
 Document(metadata={}, page_content='It supports memory, chains, tools, agents, and retrievers.')]

In [30]:
for i, doc in enumerate(results):
    print(f"[Doc {i+1}] {doc.page_content}\n")

[Doc 1] LangChain allows you to build applications using LLMs like GPT-4.

[Doc 2] LangChain is highly modular and supports plug-and-play components.

[Doc 3] It supports memory, chains, tools, agents, and retrievers.



In [31]:
print("The End")

The End
