# OpenAI Agent with LlamaIndex

## Install Dependencies

In [1]:
!pip install uv
!uv pip install -qU xpander-sdk llama-index==0.11.6 llama-index-llms-openai llama-index-readers-file llama-index-embeddings-openai llama-index-llms-openai-like "openinference-instrumentation-llama-index>=2" arize-phoenix python-dotenv

Collecting uv
  Using cached uv-0.5.25-py3-none-macosx_11_0_arm64.whl.metadata (11 kB)
Using cached uv-0.5.25-py3-none-macosx_11_0_arm64.whl (14.4 MB)
Installing collected packages: uv
Successfully installed uv-0.5.25


## Setup API Keys


In [1]:
from os import environ
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = environ["OPENAI_API_KEY"]
XPANDER_API_KEY = environ["XPANDER_API_KEY"]
XPANDER_AGENT_ID = environ["XPANDER_AGENT_ID"]

## Import libraries and setup LlamaIndex

In [7]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI


# Create an llm object to use for the QueryEngine and the ReActAgent
llm = OpenAI(model="gpt-4o")

# Set up Phoenix

In [4]:
import phoenix as px
session = px.launch_app()

  from .autonotebook import tqdm as notebook_tqdm


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [11]:
from xpander_sdk import XpanderClient, LLMProvider
# load the client
xpander_client = XpanderClient(api_key=XPANDER_API_KEY, organization_id="")
xpander_agent = xpander_client.agents.get(agent_id=XPANDER_AGENT_ID)
# create execution
xpander_agent.invoke_agent("Get only the longest readable tag")
print(xpander_agent.get_tools(llm_provider=LLMProvider.OPEN_AI))
print("Input:",xpander_agent.execution.input_message.content)
print("General:",xpander_agent.instructions._delegates[0].general)
print("Goal:",xpander_agent.instructions._delegates[0].goal)
print("Role:",xpander_agent.instructions._delegates[0].role)
print("-"*100)
print("Messages object:")
for message in xpander_agent.memory.messages:
    print("-"*100)
    print("Role:",message.role)
    print("Tool Call ID:",message.tool_call_id)
    print("Tool Calls:",message.tool_calls)
    print("Content:",message.content)
print("-"*100)

[{'type': 'function', 'function': {'name': 'LinkedInProfileServiceConvertProfileUrlToEmail', 'description': "Attempts to retrieve email addresses associated with a given LinkedIn profile URL. This operation is valuable for obtaining contact information. If the profile URL isn't provided, run SearchProfilesByCriteria first to get the profile URL. Use this operation for lead generation or to establish direct contact with LinkedIn users when you have their profile URL but need their email address.", 'parameters': {'type': 'object', 'properties': {'bodyParams': {'type': 'object', 'properties': {}, 'required': []}, 'queryParams': {'type': 'object', 'properties': {'url': {'type': 'string', 'description': "LinkedIn profile URL (e.g., 'https://www.linkedin.com/in/taylorotwell')."}}, 'required': ['url']}, 'pathParams': {'type': 'object', 'properties': {}, 'required': []}}, 'required': ['bodyParams', 'queryParams', 'pathParams']}}}, {'type': 'function', 'function': {'name': 'LinkedInProfileServi

In [12]:
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from phoenix.otel import register

tracer_provider = register()
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: default
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {'user-agent': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



## Load Documents

In [13]:
try:
    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/lyft"
    )
    lyft_index = load_index_from_storage(storage_context)

    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/uber"
    )
    uber_index = load_index_from_storage(storage_context)

    index_loaded = True
except:
    index_loaded = False

This is the point we create our vector indexes, by calculating the embedding vectors for each of the chunks. You only need to run this once.

In [14]:
if not index_loaded:
    # load data
    lyft_docs = SimpleDirectoryReader(
        input_files=["./10k/lyft_2021.pdf"]
    ).load_data()
    uber_docs = SimpleDirectoryReader(
        input_files=["./10k/uber_2021.pdf"]
    ).load_data()

    # build index
    lyft_index = VectorStoreIndex.from_documents(lyft_docs, show_progress=True)
    uber_index = VectorStoreIndex.from_documents(uber_docs, swow_progress=True)

    # persist index
    lyft_index.storage_context.persist(persist_dir="./storage/lyft")
    uber_index.storage_context.persist(persist_dir="./storage/uber")

Now create the query engines.

In [15]:
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3, llm=llm)
uber_engine = uber_index.as_query_engine(similarity_top_k=3, llm=llm)

We can now define the query engines as tools that will be used by the agent.

As there is a query engine per document we need to also define one tool for each of them.

In [16]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=lyft_engine,
        metadata=ToolMetadata(
            name="lyft_10k",
            description=(
                "Provides information about Lyft financials for year 2021. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=uber_engine,
        metadata=ToolMetadata(
            name="uber_10k",
            description=(
                "Provides information about Uber financials for year 2021. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
]

## Creating the Agent
Now we have all the elements to create a LlamaIndex ReactAgent

In [17]:
llama_agent = ReActAgent.from_tools(
    query_engine_tools,
    llm=llm,
    verbose=True,
    max_turns=10,
)

Now we can interact with the agent and ask a question.

In [18]:
response = llama_agent.chat("Who had more profit in 2021, Lyft or Uber?")
print(str(response))

> Running step 2babc6ce-b862-4264-8daa-878eda24fe99. Step input: Who had more profit in 2021, Lyft or Uber?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: lyft_10k
Action Input: {'input': "What was Lyft's profit in 2021?"}
[0m[1;3;34mObservation: Lyft did not report a profit in 2021; instead, it reported a net loss of $1.0 billion for the year.
[0m> Running step 7a172e85-1c8e-4918-829f-409ddb9a0e74. Step input: None
[1;3;38;5;200mThought: I have the information about Lyft's financial performance in 2021. Now, I need to find out Uber's profit for the same year to compare.
Action: uber_10k
Action Input: {'input': "What was Uber's profit in 2021?"}
[0m[1;3;34mObservation: Uber did not make a profit in 2021. The company reported a net loss attributable to Uber Technologies, Inc. of $496 million for that year.
[0m> Running step 898c4ca2-d33f-46a7-bd97-b045db79d5af. Step input: None
[1;3;38;5;200mThou

In [2]:
from os import environ
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = environ["OPENAI_API_KEY"]
XPANDER_API_KEY = environ["XPANDER_API_KEY"]
XPANDER_AGENT_ID = environ["XPANDER_AGENT_ID"]

from openai import OpenAI
from xpander_sdk import XpanderClient, LLMProvider
# load the client
xpander_client = XpanderClient(api_key=XPANDER_API_KEY, organization_id="")

# get the agent
xpander_agent = xpander_client.agents.get(agent_id=XPANDER_AGENT_ID)
openai_client = OpenAI(api_key=OPENAI_API_KEY)
# create execution
xpander_agent.invoke_agent("Who had more profit in 2021, Lyft or Uber?")

xpander_agent.memory.initialize(input=xpander_agent.execution.input_message,instructions=xpander_agent.instructions)

while not xpander_agent.is_finished():
    print("-"*100)
    print("Messages:",xpander_agent.memory.messages)
    print("Tools:",xpander_agent.get_tools(llm_provider=LLMProvider.OPEN_AI))
    print("-"*100)
    response = openai_client.chat.completions.create(
                model= 'gpt-4o',
                messages=xpander_agent.memory.retrieve_messages(),
                tools=xpander_agent.get_tools(llm_provider=LLMProvider.OPEN_AI),
                tool_choice="auto",
                temperature=0.0
        )
            
    # add messages directly from the LLM response
    xpander_agent.memory.add_messages(response.model_dump())
    
    # extract tool calls from the LLM response
    tool_calls = XpanderClient.extract_tool_calls(llm_response=response.model_dump(),llm_provider=LLMProvider.OPEN_AI)
    # run tools
    xpander_agent.run_tools(tool_calls=tool_calls)

# result (re fetch execution result)
# IMPORTANT: LAST TOOL IS xpfinish-agent-execution-finished WHICH IS AGENT-END with PARSING and may be slower due to inference times
execution_result = xpander_agent.execution.fetch(agent=xpander_agent,execution_id=xpander_agent.execution.id)
print("status", execution_result.status)
print("result", execution_result.result)

----------------------------------------------------------------------------------------------------
Messages: [<jsii._reference_map.InterfaceDynamicProxy object at 0x1059c0e10>, <jsii._reference_map.InterfaceDynamicProxy object at 0x106fbfa70>]
Tools: [{'type': 'function', 'function': {'name': 'LinkedInProfileServiceConvertProfileUrlToEmail', 'description': "Attempts to retrieve email addresses associated with a given LinkedIn profile URL. This operation is valuable for obtaining contact information. If the profile URL isn't provided, run SearchProfilesByCriteria first to get the profile URL. Use this operation for lead generation or to establish direct contact with LinkedIn users when you have their profile URL but need their email address.", 'parameters': {'type': 'object', 'properties': {'bodyParams': {'type': 'object', 'properties': {}, 'required': []}, 'queryParams': {'type': 'object', 'properties': {'url': {'type': 'string', 'description': "LinkedIn profile URL (e.g., 'https://w