In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
from langchain_teddynote import logging

logging.langsmith("CH14-Retriever")

LangSmith 추적을 시작합니다.
[프로젝트명]
CH14-Retriever


# EnsembleRetriever

1. 개요
    - 여러 검색기를 결합하여 더 낳은 검색 결과를 제공 

2. 주요 구성 
    - 하이브리드 검색   
        - Sparse Retriever + Dense Retriever 결합     
            - Sparse : 키워드 기반 검색에 효과적 
            - Dense : 의이 유사성 기반 검색에 효과적 
    - 결과 재순위화 

[Reference] https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.ensemble.EnsembleRetriever.html

## Hybrid 
Sparse + Dense Retriever 

In [3]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

In [4]:
# 샘플 문서 리스트
doc_list = [
    "I like apples",
    "I like apple company",
    "I like apple's iphone",
    "Apple is my favorite company",
    "I like apple's ipad",
    "I like apple's macbook",
]

In [5]:
# 키워드 기반 검색기 정의
bm25_retriever = BM25Retriever.from_texts(
    doc_list,
)
bm25_retriever.k = 1  # 검색 결과 개수

# 의미 기반 검색기 정의 
embedding = OpenAIEmbeddings()  
faiss_vectorstore = FAISS.from_texts(
    doc_list,
    embedding,
)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 1})

# 앙상블 검색기 정의
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],
    weights=[0.7, 0.3],
)

In [6]:
# 문서 검색
query = "my favorite fruit is apple"

ensemble_result = ensemble_retriever.invoke(query)
bm25_result = bm25_retriever.invoke(query)
faiss_result = faiss_retriever.invoke(query)

# 검색 문서 출력 
print("[Ensemble Retriever]")
for doc in ensemble_result:
    print(f"Content: {doc.page_content}")
    print()

print("[BM25 Retriever]")
for doc in bm25_result:
    print(f"Content: {doc.page_content}")
    print()

print("[FAISS Retriever]")
for doc in faiss_result:
    print(f"Content: {doc.page_content}")
    print()

[Ensemble Retriever]
Content: Apple is my favorite company

Content: I like apples

[BM25 Retriever]
Content: Apple is my favorite company

[FAISS Retriever]
Content: I like apples



In [7]:
# 문서 검색
query = "Apple company makes my favorite iphone"

ensemble_result = ensemble_retriever.invoke(query)
bm25_result = bm25_retriever.invoke(query)
faiss_result = faiss_retriever.invoke(query)

# 검색 문서 출력 
print("[Ensemble Retriever]")
for doc in ensemble_result:
    print(f"Content: {doc.page_content}")
    print()

print("[BM25 Retriever]")
for doc in bm25_result:
    print(f"Content: {doc.page_content}")
    print()

print("[FAISS Retriever]")
for doc in faiss_result:
    print(f"Content: {doc.page_content}")
    print()

[Ensemble Retriever]
Content: Apple is my favorite company

Content: I like apple's iphone

[BM25 Retriever]
Content: Apple is my favorite company

[FAISS Retriever]
Content: I like apple's iphone



## Config 적용 

`ConfigurableField` 클래스를 사용하여 앙상블 가중치 변경 가능

In [8]:
from langchain_core.runnables import ConfigurableField

In [9]:
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],
).configurable_fields(
    weights=ConfigurableField(
        id="ensemble_weights",
        name="Ensemble Weights",
        description="Ensemble Weights",
    )
)

In [10]:
config = {"configurable": {"ensemble_weights": [1, 0]}}

docs = ensemble_retriever.invoke("my favorite fruit is apple", config=config)
docs 

[Document(page_content='Apple is my favorite company'),
 Document(page_content='I like apples')]

In [11]:
config = {"configurable": {"ensemble_weights": [0, 1]}}

docs = ensemble_retriever.invoke("my favorite fruit is apple", config=config)
docs

[Document(page_content='I like apples'),
 Document(page_content='Apple is my favorite company')]

-----
** End of Documents **