In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")

In [2]:
from openai import OpenAI

client = OpenAI(api_key=api_key)

In [3]:
def get_response(prompt) : 
    response = client.chat.completions.create(
        model = "gpt-3.5-turbo",
        messages = [{"role" : "user", "content" : prompt},],
    )
    return response.choices[0].message.content.strip()

In [4]:
response = get_response("안녕! 넌 누구야?")
response

'안녕하세요! 저는 인공지능 텍스트 챗봇입니다. 무엇을 도와드릴까요?'

In [5]:
from llama_index.llms.openai import OpenAI 
from llama_index.core.llms import ChatMessage

client_llama = OpenAI(model="gpt-3.5-turbo")

In [6]:
message = [ChatMessage(role="user", content="안녕! 넌 누구야?")]

In [7]:
response = client_llama.chat(message)
response

ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='안녕하세요! 저는 인공지능 챗봇입니다. 무엇을 도와드릴까요?', additional_kwargs={}), raw={'id': 'chatcmpl-9Dr6wfV9waJMvh4KuMtzhsqXtMUYy', 'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='안녕하세요! 저는 인공지능 챗봇입니다. 무엇을 도와드릴까요?', role='assistant', function_call=None, tool_calls=None))], 'created': 1713089450, 'model': 'gpt-3.5-turbo-0125', 'object': 'chat.completion', 'system_fingerprint': 'fp_c2295e73ad', 'usage': CompletionUsage(completion_tokens=36, prompt_tokens=20, total_tokens=56)}, delta=None, logprobs=None, additional_kwargs={})

In [8]:
response.message.content

'안녕하세요! 저는 인공지능 챗봇입니다. 무엇을 도와드릴까요?'

In [9]:
# outdated knowledge

query = "OpenAI의 sora 모델에 대해서 설명해줘"

resp = get_response(query)
print(resp)

OpenAI의 sora 모델은 이미지 캡셔닝 및 시각적 탐색 작업에서 좋은 성능을 보여주는 모델입니다. sora 모델은 최신 시각적 인식 모델들과 비교하여 높은 성능을 보이며, 특히 자연어 처리와 시각적 인식 작업을 결합한 멀티모달 작업에서 우수한 성과를 거두고 있습니다.

sora 모델은 주로 이미지 캡셔닝(이미지에 대한 설명 생성) 및 시각적 탐색(이미지에서 특정 객체를 찾는 작업)과 같은 작업에 활용되고 있습니다. 이 모델은 다양한 시각적 데이터셋에서 훈련되어 다양한 시각적 작업을 수행하는 데 효과적입니다.

또한, sora 모델은 Transfer Learning을 활용하여 다양한 도메인에서도 좋은 성능을 보이고 있습니다. 이는 새로운 작업이나 데이터셋에 대해 적은 양의 레이블된 데이터만 사용하여도 뛰어난 결과를 얻을 수 있다는 것을 의미합니다.

종합적으로, OpenAI의 sora 모델은 이미지 캡셔닝 및 시각적 탐색을 포함한 다양한 시각적 작업에 효과적으로 활용될 수 있는 고성능의 모델로 평가되고 있습니다.


In [10]:
# context

prompt = f"""
Utilizing the given context, please answer the question.

[context]
Sora is a text-to-video model that can generate videos based on short descriptive prompts[195] as well as extend existing videos forwards or backwards in time.[196] It can generate videos with resolution up to 1920x1080 or 1080x1920. The maximal length of generated videos is unknown.
Sora's development team named it after the Japanese word for "sky", to signify its "limitless creative potential".[195] Sora's technology is an adaptation of the technology behind the DALL·E 3 text-to-image model.[197] OpenAI trained the system using publicly-available videos as well as copyrighted videos licensed for that purpose, but did not reveal the number or the exact sources of the videos.[195]
OpenAI demonstrated some Sora-created high-definition videos to the public on February 15, 2024, stating that it could generate videos up to one minute long. It also shared a technical report highlighting the methods used to train the model, and the model's capabilities.[197] It acknowledged some of its shortcomings, including struggles simulating complex physics.[198] Will Douglas Heaven of the MIT Technology Review called the demonstration videos "impressive", but noted that they must have been cherry-picked and might not represent Sora's typical output.[197]
Despite skepticism from some academic leaders following Sora's public demo, notable entertainment-industry figures have shown significant interest in the technology's potential. In an interview, actor/filmmaker Tyler Perry expressed his astonishment at the technology's ability to generate realistic video from text descriptions, citing its potential to revolutionize storytelling and content creation. He said that his excitement about Sora's possibilities was so strong that he had decided to pause plans for expanding his Atlanta-based movie studio.[199]

[user question]
{query}
"""

resp = get_response(prompt)
resp

'OpenAI의 Sora 모델은 짧은 서술적인 프롬프트에 기반하여 비디오를 생성하거나 기존 비디오를 시간을 앞뒤로 확장할 수 있는 텍스트-비디오 모델입니다. 이 모델은 1920x1080 또는 1080x1920의 해상도로 비디오를 생성할 수 있으며, 생성된 비디오의 최대 길이는 알려져 있지 않습니다. Sora의 기술은 DALL·E 3 텍스트-이미지 모델의 기술을 적용한 것이며, OpenAI는 해당 모델을 공개적으로 이용 가능한 비디오와 저작권이 허가된 비디오를 사용하여 훈련시켰습니다. 또한 OpenAI는 Sora가 한 번에 1분 길이의 비디오를 생성할 수 있다고 밝히고 기술 보고서를 공유했습니다.'

## load data

In [11]:
import requests

response = requests.get(
    "http://en.wikipedia.org/w/api.php",
    params={
        "action": "query",
        "format": "json",
        "titles": "OpenAI",
        "prop": "extracts",
        "explaintext": True
    }
).json()

response

{'batchcomplete': '',
 'query': {'pages': {'48795986': {'pageid': 48795986,
    'ns': 0,
    'title': 'OpenAI',
    'extract': 'OpenAI is a U.S.-based artificial intelligence (AI) research organization founded in December 2015, researching artificial intelligence with the goal of developing "safe and beneficial" artificial general intelligence, which it defines as "highly autonomous systems that outperform humans at most economically valuable work".\nAs one of the leading organizations of the AI spring, it has developed several large language models, advanced image generation models, and previously, released open-source models. Its release of ChatGPT has been credited with starting the AI spring.The organization consists of the non-profit OpenAI, Inc. registered in Delaware and its for-profit subsidiary OpenAI Global, LLC. It was founded by Ilya Sutskever, Greg Brockman, Trevor Blackwell, Vicki Cheung, Andrej Karpathy, Durk Kingma, Jessica Livingston, John Schulman, Pamela Vagata, and 

In [12]:
page = next(iter(response['query']['pages'].values()))

text = page['extract']
text

'OpenAI is a U.S.-based artificial intelligence (AI) research organization founded in December 2015, researching artificial intelligence with the goal of developing "safe and beneficial" artificial general intelligence, which it defines as "highly autonomous systems that outperform humans at most economically valuable work".\nAs one of the leading organizations of the AI spring, it has developed several large language models, advanced image generation models, and previously, released open-source models. Its release of ChatGPT has been credited with starting the AI spring.The organization consists of the non-profit OpenAI, Inc. registered in Delaware and its for-profit subsidiary OpenAI Global, LLC. It was founded by Ilya Sutskever, Greg Brockman, Trevor Blackwell, Vicki Cheung, Andrej Karpathy, Durk Kingma, Jessica Livingston, John Schulman, Pamela Vagata, and Wojciech Zaremba, with Sam Altman and Elon Musk serving as the initial Board of Directors members. Microsoft provided OpenAI Gl

## indexing

In [13]:
def get_chunks(text, chunk_size):
    words = text.split()
    chunks = [" ". join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

    return chunks

In [14]:
chunks = get_chunks(text, 128)
chunks

['OpenAI is a U.S.-based artificial intelligence (AI) research organization founded in December 2015, researching artificial intelligence with the goal of developing "safe and beneficial" artificial general intelligence, which it defines as "highly autonomous systems that outperform humans at most economically valuable work". As one of the leading organizations of the AI spring, it has developed several large language models, advanced image generation models, and previously, released open-source models. Its release of ChatGPT has been credited with starting the AI spring.The organization consists of the non-profit OpenAI, Inc. registered in Delaware and its for-profit subsidiary OpenAI Global, LLC. It was founded by Ilya Sutskever, Greg Brockman, Trevor Blackwell, Vicki Cheung, Andrej Karpathy, Durk Kingma, Jessica Livingston, John Schulman, Pamela Vagata, and Wojciech Zaremba, with Sam Altman and Elon Musk',
 "serving as the initial Board of Directors members. Microsoft provided OpenA

In [15]:
client.embeddings.create(input=["머신러닝"], model="text-embedding-3-small")

CreateEmbeddingResponse(data=[Embedding(embedding=[-0.008908738382160664, 0.044618140906095505, 0.017829885706305504, 0.004429554101079702, 0.004494694527238607, 0.023636694997549057, 0.017246723175048828, 0.00996339414268732, -0.04638003557920456, -0.03595755621790886, -0.0036664793733507395, 0.028686635196208954, -0.005902969744056463, 0.03637941926717758, 0.0023977907840162516, 0.033848244696855545, -0.035808663815259933, -0.03441900014877319, -0.0001786516950232908, 0.012891614809632301, -0.008890126831829548, 0.006638126913458109, 0.046231143176555634, -0.05136793479323387, -0.005496617406606674, -0.0017820579232648015, 0.03293007239699364, -0.01862397789955139, -0.007289532106369734, 0.06114521622657776, -0.0017308761598542333, -0.025051174685359, 0.01513741072267294, -0.01198585145175457, 0.008617157116532326, 0.04231030493974686, -0.01033562608063221, 0.024480419233441353, -0.02522488310933113, -0.022693708539009094, -0.009671812877058983, -0.013859416358172894, -0.046826712787

In [16]:
def get_embedding(text):
    return client.embeddings.create(input=[text], model="text-embedding-3-small").data[0].embedding

In [17]:
embeddings = [get_embedding(chunk) for chunk in chunks]
embeddings

[[-0.010375107638537884,
  0.003346505342051387,
  0.08395431935787201,
  0.012269474565982819,
  -0.005494918208569288,
  -0.02601931430399418,
  -0.0577593669295311,
  0.016810936853289604,
  -0.0014184227911755443,
  -0.0352276936173439,
  0.027876045554876328,
  -0.07050556689500809,
  -0.020549487322568893,
  -0.02943168394267559,
  -0.035378240048885345,
  0.01619620807468891,
  -0.06854847818613052,
  -0.034951694309711456,
  -0.007489648647606373,
  -0.03793751448392868,
  0.009384015575051308,
  0.0041556884534657,
  -0.026320407167077065,
  0.029682593420147896,
  -0.01563166081905365,
  -0.024802403524518013,
  -0.007878557778894901,
  0.01663530059158802,
  -0.02515367791056633,
  0.012570565566420555,
  0.022632036358118057,
  -0.00545414537191391,
  -0.03670806065201759,
  0.020273488014936447,
  -0.02511604130268097,
  0.014878933317959309,
  -0.02606949768960476,
  0.013762385584414005,
  0.02649604342877865,
  0.007583739701658487,
  0.0324425995349884,
  -0.0329193249

## retrieve

In [18]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def retriever(query, top_k):
    q_emb = get_embedding(query)
    
    sim_score = cosine_similarity([q_emb], embeddings)[0]
    max_indices = np.argsort(sim_score)[::-1][:top_k]

    retrieved_datas = [chunks[i] for i in max_indices]

    return retrieved_datas

In [19]:
# 단어의 개수로 chunking을 했기 때문에 잘리는 부분이 존재

contexts = retriever("OpenAI의 sora 모델에 대해 설명해줘", 3)
contexts

['as copyrighted videos licensed for that purpose, but did not reveal the number or the exact sources of the videos.OpenAI demonstrated some Sora-created high-definition videos to the public on February 15, 2024, stating that it could generate videos up to one minute long. It also shared a technical report highlighting the methods used to train the model, and the model\'s capabilities. It acknowledged some of its shortcomings, including struggles simulating complex physics. Will Douglas Heaven of the MIT Technology Review called the demonstration videos "impressive", but noted that they must have been cherry-picked and might not represent Sora\'s typical output.Despite skepticism from some academic leaders following Sora\'s public demo, notable entertainment-industry figures have shown significant interest in the technology\'s potential. In an interview, actor/filmmaker Tyler Perry expressed his',
 '3, a more powerful model better able to generate images from complex descriptions witho

## Generate

In [20]:
def generator(query, contexts):
    context = "\n\n".join(contexts)

    prompt = f"""
    Utilizing the given context, please answe the question

    [context]
    {context}

    [user quertion]
    {query}
    """

    return get_response(prompt)

In [21]:
generator("OpenAI의 sora 모델에 대해 알려줘", contexts)

'OpenAI의 Sora 모델은 텍스트를 기반으로 비디오를 생성할 수 있는 모델이며, 기존 비디오를 앞뒤로 확장할 수도 있습니다. Sora는 1920x1080 또는 1080x1920의 해상도를 가진 비디오를 생성할 수 있으며, 생성된 비디오의 최대 길이는 알려지지 않았습니다. Sora의 개발팀은 이를 일본어로 "하늘"을 의미하는 단어로 명명하여 "끝없는 창의적 잠재력"을 상징합니다. Sora의 기술은 DALL·E 3 텍스트에서 이미지로 변환하는 모델의 기술을 적용한 것입니다. OpenAI는 시스템을 공개적으로 이용 가능한 비디오를 사용하여 훈련시켰습니다. 현재 Sora 모델은 비판적인 피해와 리스크를 관리하기 위해 레드 팀에게 이용 가능하며, 공개에 대한 계획은 미정 상태입니다.'

## Llamaindex

- llama-index를 사용하면 위의 과정을 간단한 코드 몇줄로 구현이 가능하다.

In [22]:
from pathlib import Path

data_path = Path("../dataset/llamaindex_data")

if not data_path.exists():
    data_path.mkdir()

with open(f"{data_path}/openai.txt", "w") as fp:
    fp.write(text)

In [23]:
# SimpleDirectoryReader : 디렉토리 안에 모든 파일들을 읽어와 document를 구성

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader('../dataset/llamaindex_data').load_data()
vector_index = VectorStoreIndex.from_documents(documents)

In [24]:
query_engine = vector_index.as_query_engine()

In [25]:
response = query_engine.query(query)
print(response)

Sora is a text-to-video model developed by OpenAI that can generate videos based on short descriptive prompts and extend existing videos forwards or backwards in time. It can create videos with resolutions up to 1920x1080 or 1080x1920. The technology behind Sora is an adaptation of the DALL·E 3 text-to-image model. OpenAI trained Sora using publicly-available videos as well as copyrighted videos licensed for that purpose. The model was demonstrated generating high-definition videos up to one minute long, showcasing its ability to create realistic video content from text descriptions. Despite some skepticism, notable figures in the entertainment industry have shown significant interest in Sora's potential, with actor/filmmaker Tyler Perry expressing excitement about its capabilities to revolutionize storytelling and content creation.


- 하지만 간단하다는 것은 자유도가 떨어진다는 의미이기도 하다.
  - 문서를 어떻게 chunk 단위로 분할하였나?
  - retriever와 generator 모델은 무엇인가? (어떤 방식으로 유사도를 계산했는지와 어떤 모델을 통해 답변을 생성했는지)
  - query와 context를 결합하는 prompt는 어떻게 구성하였는가?
- 해당 부분에 대해서 커스터마이징하기 위해서는 더 복잡한 과정이 필요하다.

In [26]:
from llama_index.core import Settings

# Settings

In [27]:
# llm

# Settings._llm

In [28]:
# embedding model

# Settings._embed_model

In [29]:
# node parser

# Settings._node_parser

## Chunk size 조절하기

- llama_index에서는 Chunk를 node라고 부른다.
- document를 잘라 node로 만들 때 splitter를 사용한다.
- 위의 설정에서 본 splitter는 SentenceSplitter임을 확인할 수 있다.
- SentenceSplitter는 document를 node로 쪼갤 때 최대한 sentence를 보존하면서 쪼갠다.

In [30]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=128,
                                 chunk_overlap=10,)

nodes = text_splitter.get_nodes_from_documents(documents)
len(nodes)

185

In [31]:
nodes[0]

TextNode(id_='48648f37-881e-41db-955a-4c252d7b2ccd', embedding=None, metadata={'page_label': '1', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamaindex_data/Mistral AI - Wikipedia.pdf', 'file_type': 'application/pdf', 'file_size': 429302, 'creation_date': '2024-04-10', 'last_modified_date': '2024-04-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='1b230006-988e-4752-95bc-b6051a58ba62', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamaindex_data/Mistral AI - Wikipedia.pdf', 'file_type': 'appli

- TokenTextSplitter는 document를 token 단위로 나누는 splitter이다.
- SentenceSplitter와는 달리 sentence를 보존하지 않는다.

In [32]:
from llama_index.core.node_parser import TokenTextSplitter

text_splitter = TokenTextSplitter(chunk_size=128,
                                  chunk_overlap=10,
                                  include_metadata=False,)

nodes = text_splitter.get_nodes_from_documents(documents)
len(nodes)

161

In [33]:
nodes[0]

TextNode(id_='cba613f5-cafc-4906-997a-71c59a0782ce', embedding=None, metadata={}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='1b230006-988e-4752-95bc-b6051a58ba62', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamaindex_data/Mistral AI - Wikipedia.pdf', 'file_type': 'application/pdf', 'file_size': 429302, 'creation_date': '2024-04-10', 'last_modified_date': '2024-04-10'}, hash='16a29fa19e6871f00d11273a3307bade7fa39774de58f06189a5a20ad7e8052b'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='8bf5c2ec-e758-447a-b90e-53b83a2783de', node_type=<ObjectType.TEXT: '

In [34]:
# node 리스트에 직접 정보를 추가할 수도 있다. 잘못된 정보를 추가해보자.

from llama_index.core.schema import TextNode

node_new = TextNode(text = "OpenAI의 sora는 Text to Video 모델로, 생성할 수 있는 비디오는 오직 2D 애니메이션뿐이다. 대표적인 창작물로는 NLP에 대한 소개 애니메이션이 있다.")

nodes.append(node_new)

In [35]:
vector_index = VectorStoreIndex(nodes)

In [36]:
query_engine = vector_index.as_query_engine()
response = query_engine.query(query)

response.response

'The Sora model by OpenAI is a Text to Video model that is capable of generating only 2D animations. One of its notable creations is an animated introduction to NLP.'

- llamaindex의 SimpleDiretoryReader는 텍스트뿐만 아니라 이미지, 마크다운, pdf 등 다양한 포맷의 파일을 쉽게 처리해준다.

In [37]:
# gpt에게 Mistral AI가 어느 회사인지에 대해 질문
# Mistral AI는 프랑스 회사임.

get_response("Mistral AI는 어느 국적의 회사야?")

'Mistral AI는 영국의 회사입니다.'

In [38]:
# DirectoryReader를 통해 Mistral AI의 Wikipedia pdf를 읽어 contexts로 활용.

documents = SimpleDirectoryReader("../dataset/llamaindex_data").load_data()
vector_index = VectorStoreIndex.from_documents(documents)
query_engine = vector_index.as_query_engine()

In [39]:
query_engine.query("Mistral AI는 어느 국적의 회사야?").response

'프랑스'

In [40]:
query_engine.query("Mistral AI는 어느 국적의 회사야?")

Response(response='프랑스', source_nodes=[NodeWithScore(node=TextNode(id_='06919071-3395-4604-87f2-b996a8c81e17', embedding=None, metadata={'page_label': '1', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamaindex_data/Mistral AI - Wikipedia.pdf', 'file_type': 'application/pdf', 'file_size': 429302, 'creation_date': '2024-04-10', 'last_modified_date': '2024-04-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='9e5156c4-4901-4714-a0df-c5a0e5c22844', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamai

## Embedding 모델 바꿔보기

In [41]:
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding(model="text-embedding-3-small", dimension=128)
embed_model.get_text_embedding('llamaindex는 좋아')

[0.05708788335323334,
 -0.05078136920928955,
 -0.000506943033542484,
 -0.02057848498225212,
 0.03262164816260338,
 -0.034065306186676025,
 -0.030570130795240402,
 -0.008908901363611221,
 -0.023313840851187706,
 -0.01710863597691059,
 -0.008579645305871964,
 0.00837702676653862,
 0.017146626487374306,
 -0.02854394167661667,
 0.06240662932395935,
 0.012745996937155724,
 -0.02725224569439888,
 -0.046728990972042084,
 -0.0010510856518521905,
 0.04158753529191017,
 -0.001457906560972333,
 -0.015044455416500568,
 -0.012473727576434612,
 -0.011903862468898296,
 0.021996816620230675,
 0.010447538457810879,
 0.01884355954825878,
 -0.02300991117954254,
 0.046906281262636185,
 -0.011745565570890903,
 0.049768272787332535,
 -0.03322950378060341,
 0.011853206902742386,
 -0.016804708167910576,
 -0.041359588503837585,
 0.02707495354115963,
 0.022883275523781776,
 0.011555611155927181,
 0.003148508258163929,
 -0.00833270326256752,
 -0.005195275880396366,
 -0.0013748010387644172,
 0.0289745070040226,
 

- 기본 Embedding 모델 외에 다른 모델을 RAG 파이프라인에 적용하고 싶으면 단순히 Setting의 embed_model을 변경하면 된다.

In [42]:
Settings.embed_model = embed_model

- Retriever를 정의할 때 어떤 Index로부터 만들지를 정의할 수도 있지만 얼마만큼의 문서를 retrieve할지도 설정할 수 있다.
- Default Retriever는 `VectorIndexRetriever`이며 chunk의 embedding으로부터 현재 query와 유사한 것을 골라내는 가장 기본적인 Retriever이다.

In [43]:
from llama_index.core.retrievers import VectorIndexRetriever

retriever = VectorIndexRetriever(
    index=vector_index,
    similarity_top_k=3,
)

In [44]:
nodes = retriever.retrieve("Mistral AI는 어느 국적의 회사야?")

In [45]:
nodes

[NodeWithScore(node=TextNode(id_='06919071-3395-4604-87f2-b996a8c81e17', embedding=None, metadata={'page_label': '1', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamaindex_data/Mistral AI - Wikipedia.pdf', 'file_type': 'application/pdf', 'file_size': 429302, 'creation_date': '2024-04-10', 'last_modified_date': '2024-04-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='9e5156c4-4901-4714-a0df-c5a0e5c22844', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamaindex_data/Mistral AI - Wikipedia.pdf',

- 또 다른 Retriever에는 `DocumentSummaryRetriever`가 있다.
- DocumentSummaryRetriever는 DocumentSummaryIndex와 함께 동작한다.
- DocumentSummaryIndex는 말그대로 문서를 요약하여 저장하기 때문에 LLM이 필요하다.

In [46]:
from llama_index.core import DocumentSummaryIndex

chatgpt = OpenAI(model='gpt-3.5-turbo')

doc_summary_index = DocumentSummaryIndex.from_documents(
    documents,
    llm=chatgpt,
    show_progress=True,
)

  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes:   0%|          | 0/6 [00:00<?, ?it/s]

Parsing nodes: 100%|██████████| 6/6 [00:00<00:00, 110.37it/s]
Summarizing documents:   0%|          | 0/6 [00:00<?, ?it/s]

current doc id: 9e5156c4-4901-4714-a0df-c5a0e5c22844


Summarizing documents:  17%|█▋        | 1/6 [00:03<00:15,  3.04s/it]

current doc id: 837208bd-8cf8-4431-aae2-24eff4176beb


Summarizing documents:  33%|███▎      | 2/6 [00:05<00:11,  2.96s/it]

current doc id: 77797452-8947-48a8-a72c-0e9050211168


Summarizing documents:  50%|█████     | 3/6 [00:10<00:10,  3.49s/it]

current doc id: 9cd0dcfd-deb1-40dd-b80f-f9ce7b3a48ff


Summarizing documents:  67%|██████▋   | 4/6 [00:13<00:06,  3.49s/it]

current doc id: 66404d15-a825-4176-9b01-50a958b17951


Summarizing documents:  83%|████████▎ | 5/6 [00:15<00:02,  2.86s/it]

current doc id: ad42862d-905e-43dc-a15d-056c9c561a03


Summarizing documents: 100%|██████████| 6/6 [00:32<00:00,  5.34s/it]
Generating embeddings: 100%|██████████| 6/6 [00:00<00:00,  7.03it/s]


In [48]:
doc_summary_index.get_document_summary(documents[0].id_)

"The provided text is about Mistral AI, a French company founded in April 2023 by Arthur Mensch, Guillaume Lample, and Timothée Lacroix. Mistral AI specializes in selling artificial intelligence products, particularly open-source large language models. The company has released models such as Mistral 7B and Mixtral 8x7B, with varying numbers of parameters and capabilities. Mistral AI has received significant funding and recognition in the AI industry, with notable investors and successful fundraising rounds.\n\nSome questions that this text can answer include:\n- When was Mistral AI founded and by whom?\n- What are some of the products offered by Mistral AI?\n- How has Mistral AI been valued and funded since its inception?\n- What are the key features of Mistral AI's language processing models?\n- Who are some of the investors and partners involved with Mistral AI?\n- How does Mistral AI's technology compare to competitors in the AI industry?"

- `DocumentSummaryIndexLLMRetriever`는 요약된 내용을 LLM에 전달하여 어떤 문서를 retrieve할지 정하는 모델이다.

In [58]:
from llama_index.core.indices.document_summary import DocumentSummaryIndexLLMRetriever

retriever = DocumentSummaryIndexLLMRetriever(
    index=doc_summary_index,
    choice_batch_size=3,
)

retriever.retrieve("Mistral AI는 어느 국가에 속해 있어?")

[NodeWithScore(node=TextNode(id_='bcaf322b-2533-4bf5-b550-0dda66892111', embedding=None, metadata={'page_label': '4', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamaindex_data/Mistral AI - Wikipedia.pdf', 'file_type': 'application/pdf', 'file_size': 429302, 'creation_date': '2024-04-10', 'last_modified_date': '2024-04-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='9cd0dcfd-deb1-40dd-b80f-f9ce7b3a48ff', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '4', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamaindex_data/Mistral AI - Wikipedia.pdf',

- `DocumentSummaryIndexEmbeddingRetriever`은 요약된 내용을 Embedding을 통해 어떤 문서를 retrieve할지 정하는 모델이다.

In [59]:
from llama_index.core.indices.document_summary import DocumentSummaryIndexLLMRetriever

retriever = DocumentSummaryIndexLLMRetriever(
    index=doc_summary_index,
    similarity_top_k=3,
)

retriever.retrieve("Mistral AI는 어느 국가에 속해 있어?")

[NodeWithScore(node=TextNode(id_='65f90e7d-cfe9-4220-8dd0-43cd3927af14', embedding=None, metadata={'page_label': '1', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamaindex_data/Mistral AI - Wikipedia.pdf', 'file_type': 'application/pdf', 'file_size': 429302, 'creation_date': '2024-04-10', 'last_modified_date': '2024-04-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='9e5156c4-4901-4714-a0df-c5a0e5c22844', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'Mistral AI - Wikipedia.pdf', 'file_path': '/Users/choi/Desktop/rag-with-llamaindex/notebook/../dataset/llamaindex_data/Mistral AI - Wikipedia.pdf',

## Generator

- llamaindex가 자유도가 상대적으로 높은 프레임워크인 이유에는 generator도 여러 모델을 사용하여 custumizing이 가능하기 때문이다.

In [60]:
from llama_index.core import get_response_synthesizer

response_synthesizer = get_response_synthesizer(response_mode="compact")

In [61]:
query = "Mistral AI는 어느 국가에 속해 있어?"

response = response_synthesizer.synthesize(query, nodes=retriever.retrieve(query)) 
response.response

'프랑스'

In [62]:
# accumulate mode
# retriever에서 3개의 chunk를 retrieve하고, 각 chunk에 대해서 여러 개의 response를 생성한다.

response_synthesizer = get_response_synthesizer(response_mode="accumulate")

response = response_synthesizer.synthesize(query, nodes=retriever.retrieve(query)) 
response.response

'Response 1: 프랑스'

In [63]:
# simple_summarize mode
# 받은 청크들을 한번에 LLM에 전달하는 context에 들어갈 수 있도록 truncate한다.
# 정보 손실이 있을 수 있음.

response_synthesizer = get_response_synthesizer(response_mode="simple_summarize")

response = response_synthesizer.synthesize(query, nodes=retriever.retrieve(query)) 
response.response

'프랑스'

In [65]:
# tree_summarize mode
# 트리 방식으로 노드가 하나가 남을 때까지 summarize 하는 방식으로 응답을 생성한다.
# chunk를 결합하여 query에 대한 답변을 생성하도록 하며 생성된 답변을 다시 새로운 chunk로 삼아 query에 대한 답변을 새로 생성하는 작업을 반복한다.
# query에 대한 답변이 단 하나만 생성될 때까지 반복한다.

response_synthesizer = get_response_synthesizer(response_mode="tree_summarize")

response = response_synthesizer.synthesize(query, nodes=retriever.retrieve(query)) 
response.response

'Mistral AI는 프랑스에 속해 있습니다.'

In [66]:
response_synthesizer.get_prompts()

{'summary_template': SelectorPromptTemplate(metadata={'prompt_type': <PromptType.SUMMARY: 'summary'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings={}, function_mappings={}, default_template=PromptTemplate(metadata={'prompt_type': <PromptType.SUMMARY: 'summary'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='Context information from multiple sources is below.\n---------------------\n{context_str}\n---------------------\nGiven the information from multiple sources and not prior knowledge, answer the query.\nQuery: {query_str}\nAnswer: '), conditionals=[(<function is_chat_model at 0x12f5c11b0>, ChatPromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, message_templates=[ChatMessage(role=<MessageRole.SYST

- `get_prompt()` 메소드를 통해 어떤 prompt를 사용했는지 확인할 수 있다.
- llamaindex에서는 prompt 역시 원하는 방식으로 변경할 수 있다.

In [67]:
prompts_dict = query_engine.get_prompts()

for prompt in prompts_dict.values():
    print(prompt.get_template())
    print("****************\n\n")

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 
****************


The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 
****************




In [69]:
# prompt 변경하기
from llama_index.core import PromptTemplate

new_prompt_qa = """
Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Use Proper Korean, answer in the style of kindergarden teacher, gentle and enthusiastic.
Query: {query_str}
Answer: 
"""

new_prompt_refine = """
The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Use Proper Korean, answer in the style of kindergarden teacher, gentle and enthusiastic.
Refined Answer: 
"""

In [70]:
new_prompt_qa = PromptTemplate(new_prompt_qa)
new_prompt_refine = PromptTemplate(new_prompt_refine)

In [73]:
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template" : new_prompt_qa,
     "response_synthesizer:refine_template" : new_prompt_refine}
)

In [74]:
prompts_dict = query_engine.get_prompts()

for prompt in prompts_dict.values():
    print(prompt.get_template())
    print("****************\n\n")


Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Use Proper Korean, answer in the style of kindergarden teacher, gentle and enthusiastic.
Query: {query_str}
Answer: 

****************



The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Use Proper Korean, answer in the style of kindergarden teacher, gentle and enthusiastic.
Refined Answer: 

****************




In [75]:
response = query_engine.query(query)
response.response

'Mistral AI는 프랑스에 속해 있어요! 멋진 인공지능 제품을 만드는 프랑스 회사랍니다. 함께 알아보면 재미있어요! 🇫🇷✨'