In [1]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents import Document

In [2]:
documents = [
    Document(page_content = "langchain helps developers build llm application easily"),
    Document(page_content = "Chroma is a vector database optimized for LLM-based search"),
    Document(page_content = "Embeddings convert text into high-dimensional vectors"),
    Document(page_content = "Hugging Face provides embedding models"),
    Document(page_content = "OpenAI provides embedding models"),
    Document(page_content = "FAISS is a library for efficient similarity search"),
    Document(page_content = "MMR stands for Maximum Marginal Relevance"),
    Document(page_content = "langchain makes easy to work with llms"),
    Document(page_content = "Chroma is a vector database"),
    Document(page_content = "langchain provides many different functions for llm aplications"),
    Document(page_content = "MMR helps you get diverse results when doing similarity search")
]

In [None]:
vector_store = FAISS.from_documents(
    documents = documents,
    embedding= HuggingFaceEmbeddings()
)

In [27]:
retriver1 = vector_store.as_retriever(
    search_type = "mmr", # this enables MMR
    search_kwargs = {"k":3 , "lambda_mult":1, "fetch_k": 10}) # k = desired number of final results, lambda_mult = relevance-diversity balance, fetch_k = number of documents to fetch before MMR.
# lambda_mult (0-1), 0: very diverse answers, 1: more relevant answers

In [28]:
retriver2 = vector_store.as_retriever(
    search_type = "mmr", # this enables MMR
    search_kwargs = {"k":3 , "lambda_mult":0, "fetch_k": 10}) # k = desired number of final results, lambda_mult = relevance-diversity balance, fetch_k = number of documents to fetch before MMR.
# lambda_mult (0-1), 0: very diverse answers, 1: more relevant answers

In [29]:
query = "what is langchain?"
results1 = retriver1.invoke(query)
results2 = retriver2.invoke(query)

In [32]:
for i,doc in enumerate(results1): # when lambda_mult :1 , less diverse
  print(f"\n--- Result {i+1} ----")
  print(doc.page_content)


--- Result 1 ----
langchain provides many different functions for llm aplications

--- Result 2 ----
langchain makes easy to work with llms

--- Result 3 ----
langchain helps developers build llm application easily


In [33]:
for i,doc in enumerate(results2): # when lambda_mult :0 , more diverse
  print(f"\n--- Result {i+1} ----")
  print(doc.page_content)


--- Result 1 ----
langchain provides many different functions for llm aplications

--- Result 2 ----
Hugging Face provides embedding models

--- Result 3 ----
MMR stands for Maximum Marginal Relevance
