In [4]:
!pip install llama_hub
!pip install llama_index
!pip install packaging==23.2
!pip install torch sentence-transformers
!pip install trafilatura
!pip install torch sentence-transformers
!pip install llama-index-readers-web
!pip install llama-index-embeddings-huggingface



In [5]:
! pip install docx2txt



In [6]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_dir="/content/NVDA")
data = reader.load_data()
print(f"Loaded {len(data)} docs")

Loaded 169 docs


In [7]:
docs = []
for data in reader.iter_data():
    for d in data:
        # do something with the doc
        d.text = d.text.upper()
        docs.append(d)

print(len(docs))

169


In [8]:
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.llms.openai import OpenAI

# 定義文本解析器
# 當window_size=3時，那麼context將由當前句子之前的3個句子，當前句子，當前句子之後的2條句子一共6條句子組成的窗口(window)數據來表示。
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

node_parser

SentenceWindowNodeParser(include_metadata=True, include_prev_next_rel=True, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7d544519aad0>, id_func=<function default_id_func at 0x7d55162c0ca0>, sentence_splitter=<function split_by_sentence_tokenizer.<locals>.split at 0x7d544585b6d0>, window_size=3, window_metadata_key='window', original_text_metadata_key='original_text')

In [9]:
sentence_nodes = node_parser.get_nodes_from_documents(docs)

len(sentence_nodes)

3048

In [10]:
sentence_nodes[100].metadata

{'window': 'THESE SERVICES AND PLATFORMS CAN BE \nAVAILABLE IN A COMPANY’S ON-PREMISES \nDATA CENTERS OR THROUGH THE WORLD’S \nLEADING CLOUD SERVICE PROVIDERS.\n NVIDIA AI FOUNDATIONS IS A CLOUD \nSERVICE—A FOUNDRY—FOR BUILDING CUSTOM \nLANGUAGE MODELS AND GENERATIVE \nAI.  IT IS COMPRISED OF LANGUAGE, \nVISUAL, AND BIOLOGY MODEL-MAKING \nSERVICES.  CUSTOMERS CAN USE NVIDIA \nAI FOUNDATIONS TO CREATE, REFINE, AND \nOPERATE CUSTOM LLMS AND GENERATIVE \nAI TRAINED WITH THEIR PROPRIETARY DATA \nAND FOR THEIR DOMAIN-SPECIFIC TASKS.\n THE PHARMACEUTICAL INDUSTRY IS A TOP \nFOCUS OF NVIDIA AI FOUNDATIONS.  IT CAN \nCOST $2 BILLION AND TAKE 10-15 YEARS OF \nRESEARCH TO BRING A NEW DRUG TO MARKET. \n USING OUR SERVICE AND WORKING WITH OUR \nRESEARCHERS, CUSTOMERS CAN USE OUR \nPRETRAINED AND OPTIMIZED BIOLOGY LLMS \nTO ACCELERATE EARLY-STAGE DRUG DISCOVERY \nWORKFLOWS FROM MONTHS TO WEEKS. \n',
 'original_text': 'CUSTOMERS CAN USE NVIDIA \nAI FOUNDATIONS TO CREATE, REFINE, AND \nOPERATE CUSTOM

In [11]:
from llama_index.core import ServiceContext,Document,VectorStoreIndex
from llama_index.core.embeddings import resolve_embed_model
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
llm = OpenAI(model="gpt-3.5-turbo",
             api_key='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',
             temperature=0.1)


embed_model = resolve_embed_model("local:BAAI/bge-small-en")

sentence_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    node_parser=node_parser,

)

#創建向量数据库
document = Document(text="\n\n".join([doc.text for doc in docs]))
sentence_index = VectorStoreIndex.from_documents(
    [document],
    service_context=sentence_context
)

Access to the secret `HF_TOKEN` has not been granted on this notebook.
You will not be requested again.
Please restart the session if you want to be prompted again.
  sentence_context = ServiceContext.from_defaults(


In [12]:
#将向量数据库保存在本地
import os
from llama_index.core import StorageContext, load_index_from_storage

sentence_index.storage_context.persist(persist_dir="./sentence_index")

#从本地读取向量数据库
if not os.path.exists("./sentence_index"):
    sentence_index = VectorStoreIndex.from_documents(
        [document], service_context=sentence_context
    )

    sentence_index.storage_context.persist(persist_dir="./sentence_index")
else:
    sentence_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./sentence_index"),
        service_context=sentence_context
    )

In [13]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor,SentenceTransformerRerank

# 創建Replacement组件
# query engine元件中需要設定一個postprocessor元件作為其參數，而postprocessor元件可以由若干個子元件組合在一起

# Replacement元件是用來選擇(由target_metadata_key參數決定)將哪些context傳送給llm,
# 從檢索到的context中挑選指定的內容傳送給llm，所以它具有選擇context的功能.
postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window"
)

# 創建rerank组件
# 参考: https://huggingface.co/BAAI/bge-reranker-base
# 對檢索到的上下文進行從新排序，從而得到一個精度更高的檢索結果，最後Replacement組件會將rerank組件的排序結果發送給llm
rerank = SentenceTransformerRerank(
    top_n=2,
    model="BAAI/bge-reranker-base"
)

In [14]:
#創建查询引擎
sentence_window_engine = sentence_index.as_query_engine(
    similarity_top_k=6, #每次檢索將返回相似度最高的6個文檔
    node_postprocessors=[postproc, rerank]
    #rerank組件會對這6個文檔進行重新排序後選取2個相似度最高的文檔，
    #最後Replacement組件會將這2個相似度最高的文檔中的「視窗」資料傳送給llm
)

In [15]:
window_response = sentence_window_engine.query(
    "Highlight NVIDIA's performance in its Compute & Networking and Graphics segments. How did the revenue and operating income from these segments change compared to the previous year?"
)
print(window_response)

NVIDIA's performance in its Compute & Networking and Graphics segments showed a shift in revenue and operating income compared to the previous year. In the year ended January 29, 2023, the Compute & Networking segment had revenue of $15,068 million and operating income of $5,083 million, while the Graphics segment had revenue of $11,906 million and operating income of $4,552 million. This represented a change from the previous year, where the Compute & Networking segment had revenue of $11,046 million and operating income of $4,598 million, and the Graphics segment had revenue of $15,868 million and operating income of $8,492 million.


In [16]:
window = window_response.source_nodes[0].node.metadata["window"]
sentence = window_response.source_nodes[0].node.metadata["original_text"]

print("------------------")
print(f"Window: {window}")
print("------------------")
print(f"Original Sentence: {sentence}")

------------------
Window: THESE PROCEDURES 
ALSO INCLUDED, AMONG OTHERS, TESTING MANAGEMENT’S PROCESS FOR DEVELOPING THE PROVISIONS FOR EXCESS OR OBSOLETE 
INVENTORIES AND EXCESS PRODUCT PURCHASE COMMITMENTS; EVALUATING THE APPROPRIATENESS OF MANAGEMENT’S APPROACH; 
TESTING THE COMPLETENESS AND ACCURACY OF UNDERLYING DATA USED IN THE APPROACH; AND EVALUATING THE REASONABLENESS OF 
MANAGEMENT’S ASSUMPTIONS RELATED TO FUTURE DEMAND AND MARKET CONDITIONS.  EVALUATING MANAGEMENT’S ASSUMPTIONS 
RELATED TO FUTURE DEMAND AND MARKET CONDITIONS INVOLVED EVALUATING WHETHER THE ASSUMPTIONS USED BY MANAGEMENT WERE 
REASONABLE CONSIDERING (I) CURRENT AND PAST RESULTS, INCLUDING HISTORICAL PRODUCT LIFE CYCLE, (II) THE CONSISTENCY WITH EXTERNAL 
MARKET AND INDUSTRY DATA, AND (III) CHANGES IN TECHNOLOGY.
 /S/ PRICEWATERHOUSECOOPERS LLP
SAN JOSE, CALIFORNIA
FEBRUARY 24, 2023
WE HAVE SERVED AS THE COMPANY’S AUDITOR SINCE 2004. 
 45

NVIDIA CORPORATION AND SUBSIDIARIES
CONSOLIDATED STATEMENTS OF INCOME


In [17]:
window = window_response.source_nodes[1].node.metadata["window"]
sentence = window_response.source_nodes[1].node.metadata["original_text"]

print("------------------")
print(f"Window: {window}")
print("------------------")
print(f"Original Sentence: {sentence}")

------------------
Window: THERE IS NO INTERSEGMENT REVENUE.  THE ACCOUNTING POLICIES FOR SEGMENT REPORTING ARE THE SAME 
AS FOR OUR CONSOLIDATED FINANCIAL STATEMENTS.  THE TABLE BELOW PRESENTS DETAILS OF OUR REPORTABLE SEGMENTS AND THE “ALL 
OTHER” CATEGORY.
  COMPUTE & 
NETWORKING GRAPHICS ALL OTHER CONSOLIDATED
(IN MILLIONS)
YEAR ENDED JANUARY 29, 2023 :      
REVENUE $ 15,068 $ 11,906 $ — $ 26,974 
OPERATING INCOME (LOSS) $ 5,083 $ 4,552 $ (5,411) $ 4,224 
YEAR ENDED JANUARY 30, 2022 :      
REVENUE $ 11,046 $ 15,868 $ — $ 26,914 
OPERATING INCOME (LOSS) $ 4,598 $ 8,492 $ (3,049) $ 10,041 
YEAR ENDED JANUARY 31, 2021 :      
REVENUE $ 6,841 $ 9,834 $ — $ 16,675 
OPERATING INCOME (LOSS) $ 2,548 $ 4,612 $ (2,628) $ 4,532 NVIDIA CORPORATION AND SUBSIDIARIES
NOTES TO THE CONSOLIDATED FINANCIAL STATEMENTS
(CONTINUED)
72

YEAR ENDED
JANUARY 29,
2023JANUARY 30,
2022JANUARY 31,
2021
(IN MILLIONS)
RECONCILING ITEMS INCLUDED IN "ALL OTHER" CATEGORY:
STOCK-BASED COMPENSATION EXPENSE $ (2,710)

# 觀察：看到LLM給的response是總結了兩個context的視窗資料內容後得到的

In [18]:
import os

from llama_index.core import (
    ServiceContext,
    StorageContext,
    VectorStoreIndex,
    load_index_from_storage,
    Document
)
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.llms.openai import OpenAI
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.llms.openai import OpenAI


def build_sentence_window(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en",
    save_dir="merging_index",
    window_sizes= 3
):

    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size= window_sizes,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_nodes = node_parser.get_nodes_from_documents(documents)
    sentence_context = ServiceContext.from_defaults(
      llm=llm,
      embed_model=embed_model,
      node_parser=node_parser,
    )

    #从本地读取向量数据库
    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            [document], service_context=sentence_context
        )

        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context
        )
    return sentence_index


def get_sentence_window_query_engine(
    sentence_index,
    similarity_top_k=6,
    rerank_top_n=2,
):
    postproc = MetadataReplacementPostProcessor(
        target_metadata_key="window"
    )

    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n,
        model="BAAI/bge-reranker-base"
    )
    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k,
        node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

index = build_sentence_window(
    docs,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    save_dir="./sentence_index",
)

sentence_window_query_engine = get_sentence_window_query_engine(index, similarity_top_k=6)

sentence_window_response = sentence_window_engine.query(
    """Examine the growth in NVIDIA's key market platforms: Data Center, Gaming, Professional Visualization, and Automotive. Which platform experienced the highest growth, and what factors contributed to this growth?"""
)
print(sentence_window_response)


  sentence_context = ServiceContext.from_defaults(


The platform that experienced the highest growth for NVIDIA was the Data Center platform. This growth can be attributed to the sustained demand for exceptional 3D graphics and the scale of the gaming market, which allowed NVIDIA to leverage its GPU architecture to create platforms for scientific computing, artificial intelligence, data science, autonomous vehicles, robotics, metaverse, and 3D internet applications.


# Auto-merging Retrieval

In [19]:
from llama_index.core.node_parser import (
    HierarchicalNodeParser,
    SentenceSplitter,
)
# 設定了文檔的層次結構為[2048, 512, 128]，這就意味著每個葉子節點的大小(chunk_size)為128，
node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=[2048, 512, 128])
nodes = node_parser.get_nodes_from_documents(docs)
len(nodes)

3170

In [20]:
nodes[50]

TextNode(id_='fa096260-0018-4e71-b018-e41d045b0ea6', embedding=None, metadata={'page_label': '14', 'file_name': '2023-Annual-Report-1.pdf', 'file_path': '/content/NVDA/2023-Annual-Report-1.pdf', 'file_type': 'application/pdf', 'file_size': 41968287, 'creation_date': '2024-03-24', 'last_modified_date': '2024-03-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='51a2b1a3-cb16-4636-be68-9a9b43fca05f', node_type=<ObjectType.TEXT: '1'>, metadata={'page_label': '14', 'file_name': '2023-Annual-Report-1.pdf', 'file_path': '/content/NVDA/2023-Annual-Report-1.pdf', 'file_type': 'application/pdf', 'file_size': 41968287, 'creation_date': '2024-03-24', 'last_modified_date': '2024-03-24'}, hash='51e390abfbb1a2125e0a4e

In [21]:
from llama_index.core.node_parser import get_leaf_nodes

leaf_nodes = get_leaf_nodes(nodes)
leaf_nodes[30]

TextNode(id_='a8113b5c-5c7e-498a-b6e4-5da02cef5bec', embedding=None, metadata={'page_label': '14', 'file_name': '2023-Annual-Report-1.pdf', 'file_path': '/content/NVDA/2023-Annual-Report-1.pdf', 'file_type': 'application/pdf', 'file_size': 41968287, 'creation_date': '2024-03-24', 'last_modified_date': '2024-03-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='fa096260-0018-4e71-b018-e41d045b0ea6', node_type=<ObjectType.TEXT: '1'>, metadata={'page_label': '14', 'file_name': '2023-Annual-Report-1.pdf', 'file_path': '/content/NVDA/2023-Annual-Report-1.pdf', 'file_type': 'application/pdf', 'file_size': 41968287, 'creation_date': '2024-03-24', 'last_modified_date': '2024-03-24'}, hash='232431b6385ea54640812b

In [22]:
nodes_by_id = {node.node_id: node for node in nodes}

parent_node = nodes_by_id[leaf_nodes[30].parent_node.node_id]
parent_node

TextNode(id_='fa096260-0018-4e71-b018-e41d045b0ea6', embedding=None, metadata={'page_label': '14', 'file_name': '2023-Annual-Report-1.pdf', 'file_path': '/content/NVDA/2023-Annual-Report-1.pdf', 'file_type': 'application/pdf', 'file_size': 41968287, 'creation_date': '2024-03-24', 'last_modified_date': '2024-03-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='51a2b1a3-cb16-4636-be68-9a9b43fca05f', node_type=<ObjectType.TEXT: '1'>, metadata={'page_label': '14', 'file_name': '2023-Annual-Report-1.pdf', 'file_path': '/content/NVDA/2023-Annual-Report-1.pdf', 'file_type': 'application/pdf', 'file_size': 41968287, 'creation_date': '2024-03-24', 'last_modified_date': '2024-03-24'}, hash='51e390abfbb1a2125e0a4e

In [23]:
#创建LLM
os.environ["OPENAI_API_KEY"]='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'
llm = OpenAI(model="gpt-3.5-turbo",
             api_key='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',
             temperature=0)

#创建ServiceContext
auto_merging_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-zh-v1.5",
    node_parser=node_parser,
)

#创建向量库索引
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)
automerging_index = VectorStoreIndex(
    leaf_nodes,
    storage_context=storage_context,
    service_context=auto_merging_context
)

#向量库持久化
automerging_index.storage_context.persist(persist_dir="./merging_index")

  auto_merging_context = ServiceContext.from_defaults(


In [24]:
import os
from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index.core import load_index_from_storage

if not os.path.exists("./merging_index"):
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    automerging_index = VectorStoreIndex(
            leaf_nodes,
            storage_context=storage_context,
            service_context=auto_merging_context
        )

    automerging_index.storage_context.persist(persist_dir="./merging_index")
else:
    automerging_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./merging_index"),
        service_context=auto_merging_context
    )

In [25]:
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

# 自動合併檢索器automerging_retriever，它有一個輸入參數similarity_top_k，我們將其設為12，這意味著檢索器每次在檢索時會返回12個相關文檔(context),
base_retriever = automerging_index.as_retriever(
    similarity_top_k=12
)

retriever = AutoMergingRetriever(
    base_retriever,
    automerging_index.storage_context,
    verbose=True
)
# verbose=True顯示檢索過程的中間結果，我們看到其中有一個父節點中的3個葉子節點被檢索到了，因為一個父節點包含最多4個葉子節點(由文檔層次結構確定)，那麼如果父節點中有3個葉子節點被檢索到，那麼該父節點將會作為context被傳回給llm，
# 而當只有1當一個葉子節點被檢索到時，該父節點將不會被傳回給llm。


rerank = SentenceTransformerRerank(top_n=6, model="BAAI/bge-reranker-base")


auto_merging_engine = RetrieverQueryEngine.from_args(
    retriever, node_postprocessors=[rerank]
)

In [26]:
auto_merging_response = auto_merging_engine.query(
    """
    Highlight NVIDIA's performance in its Compute & Networking and Graphics segments. How did the revenue and operating income from these segments change compared to the previous year?
    """
)

In [27]:
from llama_index.core.response.notebook_utils import display_response

display_response(auto_merging_response)

**`Final Response:`** NVIDIA's Compute & Networking segment saw an increase in revenue and operating income compared to the previous year. The Graphics segment also experienced growth in revenue and operating income when compared to the previous year.

In [28]:
import os

from llama_index.core import (
    ServiceContext,
    StorageContext,
    VectorStoreIndex,
    load_index_from_storage,
    Document
)
from llama_index.core.node_parser import HierarchicalNodeParser
from llama_index.core.node_parser import get_leaf_nodes
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.llms.openai import OpenAI


def build_automerging_index(
    docs,
    llm,
    embed_model="local:BAAI/bge-small-en",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(docs)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=6,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine

index = build_automerging_index(
    docs,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    save_dir="./merging_index",
)

query_engine = get_automerging_query_engine(index, similarity_top_k=6)

auto_merging_response = auto_merging_engine.query(
   """
   Examine the growth in NVIDIA's key market platforms: Data Center, Gaming, Professional Visualization, and Automotive. Which platform experienced the highest growth, and what factors contributed to this growth?
    """
)

display_response(auto_merging_response)

  merging_context = ServiceContext.from_defaults(


**`Final Response:`** The Gaming platform experienced the highest growth among NVIDIA's key market platforms. This growth can be attributed to factors such as increased demand for gaming products, advancements in gaming technology, and strategic partnerships within the gaming industry.

In [26]:
# ! pip install trulens_eval



In [27]:
# ! pip uninstall packaging

Found existing installation: packaging 23.2
Uninstalling packaging-23.2:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/packaging-23.2.dist-info/*
    /usr/local/lib/python3.10/dist-packages/packaging/*
Proceed (Y/n)? [31mERROR: Operation cancelled by user[0m[31m
[0m

In [28]:
# ! pip install 'packaging==23.2'

[31mERROR: Operation cancelled by user[0m[31m
[0m

In [None]:
import packaging
print(packaging.__version__)

In [30]:
# !pip install 'langchain>=0.0.354'



In [31]:
import trulens_eval

	(protobuf 3.20.3 (/usr/local/lib/python3.10/dist-packages), Requirement.parse('protobuf>=4.23.2'))

This package is optional for trulens_eval so this may not be a problem but if
you need to use the related optional features and find there are errors, you
will need to resolve the conflict:

    ```bash
    pip install 'protobuf>=4.23.2'
    ```

If you are running trulens_eval in a notebook, you may need to restart the
kernel after resolving the conflict. If your distribution is in a bad place
beyond this package, you may need to reinstall trulens_eval so that all of the
dependencies get installed and hopefully corrected:
    
    ```bash
    pip uninstall -y trulens_eval
    pip install trulens_eval
    ```

	(scikit-learn 1.2.2 (/usr/local/lib/python3.10/dist-packages), Requirement.parse('scikit-learn>=1.3.1'))

This package is optional for trulens_eval so this may not be a problem but if
you need to use the related optional features and find there are errors, you
will need to resolv

In [43]:
from trulens_eval import Tru
from trulens_eval import Feedback,TruLlama
from trulens_eval import OpenAI as fOpenAI
from trulens_eval.feedback import Groundedness
import numpy as np


import nest_asyncio

#初始化评估数据库
Tru().reset_database()

#设置线程的并发执行
nest_asyncio.apply()

In [44]:
#定义问题
eval_questions = [
                 "Highlight NVIDIA's performance in its Compute & Networking and Graphics segments. How did the revenue and operating income from these segments change compared to the previous year?",
                 "Examine the growth in NVIDIA's key market platforms: Data Center, Gaming, Professional Visualization, and Automotive. Which platform experienced the highest growth, and what factors contributed to this growth?",
                 "Describe the NVIDIA Hopper GPU architecture's role and significance in fiscal year 2023. What were the first products based on this architecture?",
]

In [45]:
#创建评估器对象
tru = Tru()

#定义评估记录器
def get_prebuilt_trulens_recorder(query_engine, app_id):
    openai = fOpenAI()

    qa_relevance = (
        Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
        .on_input_output()
    )

    qs_relevance = (
        Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
        .on_input()
        .on(TruLlama.select_source_nodes().node.text)
        .aggregate(np.mean)
    )

    grounded = Groundedness(groundedness_provider=openai)

    groundedness = (
        Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
            .on(TruLlama.select_source_nodes().node.text)
            .on_output()
            .aggregate(grounded.grounded_statements_aggregator)
    )

    feedbacks = [qa_relevance, qs_relevance, groundedness]
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
    )
    return tru_recorder

#定义执行评估函数
def run_evals(eval_questions, tru_recorder, query_engine):
    for question in eval_questions:
        with tru_recorder as recording:
            response = query_engine.query(question)



In [46]:
service_context=ServiceContext.from_defaults(
    chunk_size=1024,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model=embed_model
)
index=VectorStoreIndex.from_documents(docs,service_context=service_context)
basic_query_engine=index.as_query_engine()

tru_recorder = get_prebuilt_trulens_recorder(
    basic_query_engine,
    app_id ='Basic_RAG'
)

# #执行评估
run_evals(eval_questions, tru_recorder, basic_query_engine)

  service_context=ServiceContext.from_defaults(


✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

In [47]:
# sentence window RAG

sentence_index = build_sentence_window(
    docs,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    save_dir="./sentence_index_1",
)
sentence_window_query_engine = get_sentence_window_query_engine(sentence_index, similarity_top_k=6)

# 创建记录器
tru_recorder = get_prebuilt_trulens_recorder(
    sentence_window_query_engine,
    app_id ='sentence_window'
)

# 执行评估
run_evals(eval_questions, tru_recorder, sentence_window_query_engine)

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

  sentence_context = ServiceContext.from_defaults(


Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

In [48]:
Tru().run_dashboard()

<Popen: returncode: 0 args: ['streamlit', 'run', '--server.headless=True', '...>

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Submit this IP Address: 34.16.152.172



In [49]:
Tru().get_leaderboard(app_ids=[])


Unnamed: 0_level_0,Groundedness,Answer Relevance,Context Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic_RAG,1.0,1.0,0.816667,3.0,0.002234
sentence_window,0.333333,0.95,0.375,3.0,0.002879


In [52]:
auto_merging_index = build_automerging_index(
    docs,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="./merging_index_1",
    chunk_sizes=[2048,512,128],
)

auto_merging_engine = get_automerging_query_engine(
    auto_merging_index,
    similarity_top_k=12,
    rerank_top_n=6,
)

tru_recorder = get_prebuilt_trulens_recorder(
    auto_merging_engine,
    app_id ='auto_merging'
)

run_evals(eval_questions, tru_recorder, auto_merging_engine)
Tru().run_dashboard()


Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

  merging_context = ServiceContext.from_defaults(


Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Submit this IP Address: 34.16.152.172



<Popen: returncode: 0 args: ['streamlit', 'run', '--server.headless=True', '...>

In [53]:
Tru().get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Groundedness,Answer Relevance,Context Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic_RAG,1.0,1.0,0.816667,3.0,0.002234
auto_merging,0.644444,0.92,0.455556,4.5,0.00136
sentence_window,0.5,0.933333,0.583333,3.0,0.002879


# Agent


In [55]:
pip install -U llama-index llama-index-vector-stores-milvus pymilvus llama-index-llms-openai llama-index-readers-file

Collecting llama-index-vector-stores-milvus
  Downloading llama_index_vector_stores_milvus-0.1.6-py3-none-any.whl (5.4 kB)
Collecting pymilvus
  Downloading pymilvus-2.4.0-1-py3-none-any.whl (189 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.7/189.7 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting grpcio<=1.60.0,>=1.49.1 (from pymilvus)
  Downloading grpcio-1.60.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
Collecting environs<=9.5.0 (from pymilvus)
  Downloading environs-9.5.0-py2.py3-none-any.whl (12 kB)
Collecting ujson>=2.0.0 (from pymilvus)
  Downloading ujson-5.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.2/53.2 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Collecting minio>=7.0.0 (from pymilvus)
  Downloading mini

In [58]:
from llama_index.core import (
   SimpleDirectoryReader,
   VectorStoreIndex,
   StorageContext
)
from llama_index.core.tools import QueryEngineTool, ToolMetadata

from llama_index.core import SimpleDirectoryReader

nvda_docs = SimpleDirectoryReader(input_dir="/content/NVDA").load_data()
msft_docs = SimpleDirectoryReader(input_dir="/content/MSFT").load_data()

In [61]:
stocks = ['nvda', 'msft']

stock_docs = {'nvda':nvda_docs, 'msft':msft_docs}

In [62]:
from llama_index.agent.openai import OpenAIAgent
from llama_index.core import load_index_from_storage, StorageContext
from llama_index.core.node_parser import SentenceSplitter
import os
from llama_index.core.callbacks import CallbackManager
node_parser = SentenceSplitter()

# Build agents dictionary
query_engine_tools = []
callback_manager = CallbackManager([])

for idx, stock in enumerate(stocks):
    nodes = node_parser.get_nodes_from_documents(stock_docs[stock])

    if not os.path.exists(f"./data/{stock}"):
        # build vector index
        vector_index = VectorStoreIndex(
            nodes, callback_manager=callback_manager
        )
        vector_index.storage_context.persist(
            persist_dir=f"./data/{stock}"
        )
    else:
        vector_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=f"./data/{stock}"),
            callback_manager=callback_manager,
        )
    # define query engines
    vector_query_engine = vector_index.as_query_engine(llm=llm)

    # define tools
    query_engine_tools.append(
        QueryEngineTool(
            query_engine=vector_query_engine,
            metadata=ToolMetadata(
                name=f"vector_tool_{stock}",
                description=(
                    """
                    Provides detail information about {stock} financials for year 2023.
                    Use a detailed plain text question as input to the tool.
                    """
                ),
            ),
        )
    )

In [63]:
from llama_index.core.agent import AgentRunner
from llama_index.agent.openai import OpenAIAgentWorker, OpenAIAgent
from llama_index.agent.openai import OpenAIAgentWorker

openai_step_engine = OpenAIAgentWorker.from_tools(
    query_engine_tools, llm=llm, verbose=True
)
agent = AgentRunner(openai_step_engine)

In [70]:
response = agent.chat("What is the company revenue in 2023?")


Added user message to memory: What is the company revenue in 2023?
=== Calling Function ===
Calling function: vector_tool_nvda with args: {"input": "What is the company revenue in 2023?"}
Got output: The company revenue in 2023 was $26.97 billion.

=== Calling Function ===
Calling function: vector_tool_msft with args: {"input": "What is the company revenue in 2023?"}
Got output: The company revenue in 2023 was $211.9 billion.



In [71]:
print(str(response))

The revenue for the two companies in 2023 was as follows:
- Company A: The company revenue in 2023 was $26.97 billion.
- Company B: The company revenue in 2023 was $211.9 billion.


In [72]:
# list the task and steps for visibility
tasks = agent.list_tasks()
print(f"Task ID: {tasks[-1].task.task_id}")
completed_steps = agent.get_completed_steps(tasks[-1].task.task_id)
print(f"Number of steps: {len(completed_steps)}")

Task ID: 22f1b9b2-1e4d-486e-9dc3-b942b8d94c70
Number of steps: 2


In [73]:
response = agent.chat("What is the company's highlight in 2023?")

Added user message to memory: What is the company's highlight in 2023?
=== Calling Function ===
Calling function: vector_tool_nvda with args: {"input": "What is the highlight of the company in 2023?"}
Got output: The highlight of the company in 2023 was the introduction of the NVIDIA Hopper GPU architecture and the ramp of the first products based on this architecture, including the NVIDIA H100 Tensor Core GPU.

=== Calling Function ===
Calling function: vector_tool_msft with args: {"input": "What is the highlight of the company in 2023?"}
Got output: The highlight of the company in 2023 is the commitment to addressing racial injustice and inequity, as outlined in the Racial Equity Initiative. This initiative focuses on strengthening communities, engaging the ecosystem, and increasing representation and inclusion, with specific actions and progress expected to be achieved or exceeded by 2025.



In [74]:
print(str(response))

The highlights for the two companies in 2023 were as follows:
- Company A: The highlight of the company in 2023 was the introduction of the NVIDIA Hopper GPU architecture and the ramp of the first products based on this architecture, including the NVIDIA H100 Tensor Core GPU.
- Company B: The highlight of the company in 2023 is the commitment to addressing racial injustice and inequity, as outlined in the Racial Equity Initiative. This initiative focuses on strengthening communities, engaging the ecosystem, and increasing representation and inclusion, with specific actions and progress expected to be achieved or exceeded by 2025.
