`02-rag.ipynb`
```sh
pip install langchain langchain-chroma langchain-openai
```

In [1]:
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

load_dotenv()
model = ChatOpenAI(model="gpt-3.5-turbo-1106")

In [2]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

In [5]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings(),
)

In [12]:
vectorstore.similarity_search_with_score('not annoying')

[(Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  0.5027285218238831),
 (Document(metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
  0.5148149132728577),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  0.5198458433151245),
 (Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
  0.5290757417678833)]

In [21]:
emb = OpenAIEmbeddings().embed_query('not annoying')

In [23]:
vectorstore.similarity_search_by_vector(emb)

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.')]

In [24]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

In [25]:
retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)
retriever.batch(['cat', 'shark'])

[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.')]]

In [26]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

llm = ChatOpenAI(model='gpt-3.5-turbo-1106')

message = '''
Answer this question using the provided context only.

{question}

Context:
{context}
'''

prompt = ChatPromptTemplate.from_messages(['human', message])

rag_chain = {'context': retriever, 'question': RunnablePassthrough()} | prompt | llm

In [34]:
res = rag_chain.invoke('tell me about cat and parrot')

In [35]:
res.content

"I'm sorry, I cannot provide an accurate answer to your question as the context only mentions parrots and does not provide any information about cats. If you have specific questions about parrots, I would be happy to help."

## RAG

In [64]:
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

load_dotenv()
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

In [65]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [73]:
loader = WebBaseLoader(
    web_paths=('https://namu.wiki/w/%ED%8C%90%ED%83%80%EC%A7%80%EB%A5%BC%20%EC%97%AC%ED%96%89%ED%95%98%EB%8A%94%20%ED%98%84%EB%8C%80%EC%9D%B8%EC%9D%84%20%EC%9C%84%ED%95%9C%20%EC%95%88%EB%82%B4%EC%84%9C',),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=('KxP6Q+Qe',)
        )
    )
)

docs = loader.load()

In [74]:
# docs 내용을 vector로 바꾸기 전에 문서를 자르기
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [75]:
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [76]:
retriever = vectorstore.as_retriever()

In [77]:
prompt = hub.pull('rlm/rag-prompt')

In [78]:
def format_docs(docs):
    return '\n\n'.join(doc.page_content for doc in docs)

rag_chain = (
    {'context': retriever | format_docs, 'question': RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [79]:
print(rag_chain.invoke('판타지 세계에 떨어지면 뭘 제일 먼저 해야함?'))

판타지 세계에 떨어졌을 때는 일단 안전을 확인하고 도움을 요청해야 합니다. 그리고 그 세계에 적응하기 위해 필요한 지식과 기술을 습득해야 합니다. 마지막으로는 그 세계의 규칙과 법률을 숙지하여 존재할 수 있도록 조치해야 합니다.


In [None]:
'''
판타지 세계에 떨어졌을 때는 일단 안전을 확인하고 도움을 요청해야 합니다. 
그리고 그 세계에 적응하기 위해 필요한 지식과 기술을 습득해야 합니다. 
마지막으로는 그 세계의 규칙과 법률을 숙지하여 존재할 수 있도록 조치해야 합니다.
'''