In [None]:
%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4

In [None]:
!pip install bitsandbytes
!pip install accelerate

In [31]:
!pip install transformers
!pip install fastembed


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting fastembed
  Downloading fastembed-0.5.0-py3-none-any.whl.metadata (8.5 kB)
Collecting loguru<0.8.0,>=0.7.2 (from fastembed)
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Collecting mmh3<5.0.0,>=4.1.0 (from fastembed)
  Downloading mmh3-4.1.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (13 kB)
Collecting onnx>=1.15.0 (from fastembed)
  Downloading onnx-1.17.0-cp312-cp312-macosx_12_0_universal2.whl.metadata (16 kB)
Collecting pillow<11.0.0,>=10.3.0 (from fastembed)
  Using cached pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.2 kB)
Collecting py-rust-stemmers<0.2.0,>=0.1.0 (from fastembed)
  Downloading py_rust_stemmers-0.1.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (3.4 kB)
Downloading fastembed-0.5.0

1. indexing: load
DocumentLoaders를 사용하여 블로그 내용을 롣드.
Doucment는 page_content(str)와 메타데이터(dict)를 포함하는 객체
이 경우 urllib을 사용하여 웹 URL에서 HTML을 로드하고, BeautifulSoup을 사용하여 텍스트로 구문 분석
bs_kwags를 통해 BeautifulSoup 파서에 매개변수를 전달하여 HTML -> 텍스트 구문 분석을 사용자 정의할 수 있다.

2. indexing: split
로드된 문서의 길이가 너무 길면 Document가 처리하기 위해서 청크단위로 분할
문서를 재귀적으로 분할하는 RecursiveCharacterTextSplitter 이용

3. indexing: store
벡터데이터베이스에 저장
- openai의 embedding 방식이 가장 좋음
- 그 다음 fastembed

4. Retrieval and Generation: Retrieve
- 문서검색

5. Retrieval and Generation: Generate



In [None]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import FastEmbedEmbeddings
import getpass 
import os
from dotenv import load_dotenv
from torch import cuda, bfloat16
import transformers

# LCEL Runnable 프로토콜 사용하여 체인을 정의하고 수행
# 스트리밍, 비동기 및 일괄 호출을 즉시 실행
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


load_dotenv()
llm = ChatOpenAI(model="gpt-3.5-turbo")

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# 양자화 설정으로 모델의 GPU 메모리 사용을 줄이도록 설정
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

hf_auth = os.environ["HUGGING_FACE_KEY"]
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# post의 타이틀, 헤더, 콘텐츠만 가져오기
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer}
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

# vector store 저장
vectorsotre = Chroma.from_documents(documents=all_splits, embedding=FastEmbedEmbeddings())

print(all_splits[10].metadata)

# 문서 검색
retrieveer = vectorsotre.as_retriever(search_type="similarity", search_kwargs={"k": 6})
retrieved_docs = retriever.invoke("What are the approaches to Task Decomposition?")

print(retrieved_docs[0].page_content)

# Generate
prompt = hub.pull("rlm/rag-prompt")

examples_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()
print(examples_messages[0].content)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retrieveer | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)