In [98]:
import os, openai

In [99]:
with open('../sample_data/token.txt', 'r') as file:
    os.environ["OPENAI_API_KEY"] = file.read().strip()

In [100]:
openai.api_key = os.getenv("OPENAI_API_KEY")

# Llama index

## Creating Index

In [101]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

> Examples : https://www.worldarchery.sport/rulebook/article/1
> 세계양궁협회 제1규정 Constitution and Procedures

In [102]:
book_path = '../sample_data/EN-Book1.pdf'

In [103]:
documents = SimpleDirectoryReader(input_files=[book_path]).load_data(num_workers=4)
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()

In [104]:
response = query_engine.query("What is this text about?")
print(response)

The text provides information about the rules and procedures related to violations, sanctions, and the decision-making process by the Board of Justice and Ethics in the context of a regulatory framework for handling misconduct or breaches of rules within a specific organization or association.


In [105]:
response = query_engine.query("이건 어떤 문서이냐?")
print(response)

이 문서는 월드 아처리의 국제 심판들에 대한 규정과 절차에 관한 내용을 다루고 있습니다.


In [106]:
response = query_engine.query("who is this text about?")
print(response)

The text is about the roles and responsibilities of the Vice Presidents and the First Vice President within World Archery.


In [107]:
response = query_engine.query("when was this book published")
print(response)

The book was published with the information provided in the context.


In [108]:
response = query_engine.query("list 5 important points from this book")
print(response)

1. The Executive Board is responsible for governance and supervision of World Archery affairs, including approving budgets, appointing key positions, and deciding on sanctions and doping matters.
2. The Executive Board may call an extraordinary Congress when necessary and make decisions by a simple majority vote.
3. The Executive Committee consists of the President, First Vice President, and one Vice President, with any two forming a quorum.
4. The Executive Committee handles routine business and ensures proper accounting practices are maintained.
5. Vacancies in the Executive Board are filled based on specific criteria, and decisions made by the Executive Board are circulated to all Members and officers.


# Saving and Loading Index

In [109]:
index.storage_context.persist("judge_index")

In [110]:
from llama_index.core import StorageContext, load_index_from_storage

In [111]:
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="judge_index")
# load index
new_index = load_index_from_storage(storage_context)

In [112]:
new_query_engine = new_index.as_query_engine()
response = new_query_engine.query("who is this text about?")
print(response)

The text is about the roles and responsibilities of the Vice Presidents and the First Vice President within World Archery.


# Customizing LLM's

## LLM 모델을 선택할 수 있는 기능

In [113]:
from llama_index.core import Settings
from llama_index.llms.langchain import LangChainLLM
from langchain_openai import ChatOpenAI

In [114]:
llm_predictor = LangChainLLM(llm=ChatOpenAI(temperature=0, model_name="gpt-4o"))

In [115]:
Settings.llm_predictor = llm_predictor

In [116]:
custom_llm_index = VectorStoreIndex.from_documents(
    documents, llm_predictor = llm_predictor
)

In [117]:
custom_llm_query_engine = custom_llm_index.as_query_engine()
response = custom_llm_query_engine.query("who is this text about?")
print(response)

The text is about the roles and responsibilities of the Vice Presidents within World Archery, particularly focusing on the First Vice President and the process for filling that position in case of vacancy or the First Vice President becoming the President.


# Customizing Prompt

## 답변을 주는 프롬프트를 커스텀마이징할 수 있는 기능

In [118]:
from llama_index.core import PromptTemplate

In [119]:
template = (
    "We have provided context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this information, please answer the question and each answer should start with code word AI Demos: {query_str}\n"
)

In [120]:
qa_template = PromptTemplate(template)

In [121]:
query_engine = custom_llm_index.as_query_engine(text_qa_template=qa_template)
response = query_engine.query("who is this text about?")
print(response)

AI Demos: This text is about the roles and responsibilities of the Vice Presidents in World Archery, specifically the First Vice President.


# Custom Embedding

In [122]:
%pip install llama-index-embeddings-langchain

Note: you may need to restart the kernel to use updated packages.


In [123]:
from langchain_huggingface import HuggingFaceEmbeddings
from llama_index.core import Settings
from llama_index.embeddings.langchain import LangchainEmbedding

In [124]:
# load in HF embedding model from langchain
embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'))
Settings.embed_model = embed_model

In [125]:
new_index = VectorStoreIndex.from_documents(
    documents,
    embed_model=embed_model
)

In [126]:
query_engine = new_index.as_query_engine()
response = query_engine.query("list 5 important points from this book")
print(response)

1. The Executive Board is responsible for governance and supervision of World Archery affairs, including approving budgets, appointing key positions, and deciding on sanctions and doping matters.
2. The Executive Board may call an extraordinary Congress when necessary and make decisions by a simple majority vote.
3. The Executive Committee consists of the President, First Vice President, and one Vice President, with any two forming a quorum for decision-making.
4. Vacancies in the Executive Board may be filled by the next highest vote recipient if they meet certain criteria, or by appointment if necessary.
5. The decisions of Executive Board meetings, including budget approvals, are circulated to all Members and officers for transparency.


In [127]:
query_engine = new_index.as_query_engine()
response = query_engine.query("what about judge")
print(response)

