# OpenAI Agent with LlamaIndex

## Install Dependencies

In [37]:
# !pip install uv
# !uv pip install --system -qU llama-index==0.11.6 llama-index-llms-openai llama-index-readers-file llama-index-embeddings-openai llama-index-llms-openai-like "openinference-instrumentation-llama-index>=2" arize-phoenix python-dotenv

In [38]:
# !pip install flask twilio
!pip install pyngrok


I0000 00:00:1731792304.028409     669 fork_posix.cc:75] Other threads are currently calling into gRPC, skipping fork() handlers


Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Downloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.1


## Setup API Keys


In [10]:
from os import environ
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = environ["OPENAI_API_KEY"]

In [40]:
from os import environ
from dotenv import load_dotenv

load_dotenv()


# Your Twilio credentials
TWILIO_ACCOUNT_SID = environ["TWILIO_ACCOUNT_SID"]
TWILIO_AUTH_TOKEN = environ["TWILIO_AUTH_TOKEN"]


In [41]:
from flask import Flask, request
from twilio.twiml.voice_response import VoiceResponse
import threading
from werkzeug.serving import make_server

app = Flask(__name__)

class ServerThread(threading.Thread):
    def __init__(self, app):
        threading.Thread.__init__(self)
        self.server = make_server('127.0.0.1', 5000, app)
        self.ctx = app.app_context()
        self.ctx.push()

    def run(self):
        self.server.serve_forever()

    def shutdown(self):
        self.server.shutdown()

In [44]:
# Global variable to store server thread
server_thread = None

def start_server():
    global server_thread
    if server_thread is None:
        server_thread = ServerThread(app)
        server_thread.start()
        print("Server started!")

def stop_server():
    global server_thread
    if server_thread is not None:
        server_thread.shutdown()
        server_thread = None
        print("Server stopped!")

# Start the server
start_server()

Server started!


127.0.0.1 - - [16/Nov/2024 21:32:34] "GET / HTTP/1.1" 404 -
127.0.0.1 - - [16/Nov/2024 21:32:34] "GET /favicon.ico HTTP/1.1" 404 -


In [None]:
# Make a test call
call = client.calls.create(
    to="+15108474160",  # Your number from earlier
    from_=environ["+18337041925"],
    url="http://127.0.0.1:5000/answer"
)
print(f"Call initiated with SID: {call.sid}")

KeyError: '+18337041925'

: 

## Import libraries and setup LlamaIndex

In [43]:
@app.route("/answer", methods=['POST'])
def answer_call():
    response = VoiceResponse()
    spoken_text = request.values.get('SpeechResult', '')
    
    if not spoken_text:
        gather = response.gather(
            input='speech',
            action='/process_speech',
            timeout=3,
            language='en-US'
        )
        gather.say('Please ask your question about Rapamycin.')
    else:
        response.redirect('/process_speech')
    
    return str(response)

@app.route("/process_speech", methods=['POST'])
def process_speech():
    response = VoiceResponse()
    spoken_text = request.values.get('SpeechResult', '')
    
    try:
        agent_response = agent.chat(spoken_text)
        tts_text = str(agent_response)
        response.say(tts_text, voice='alice')
        
    except Exception as e:
        response.say("I'm sorry, there was an error processing your request.")
        print(f"Error: {str(e)}")
    
    return str(response)

@app.route("/call", methods=['POST'])
def make_call():
    to_number = request.form['to']
    
    try:
        call = client.calls.create(
            to=to_number,
            from_=environ["TWILIO_PHONE_NUMBER"],  # Make sure this is in your .env
            url=request.url_root + 'answer'
        )
        return f"Call initiated with SID: {call.sid}"
    except Exception as e:
        return f"Error making call: {str(e)}"

AssertionError: View function mapping is overwriting an existing endpoint function: answer_call

In [11]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI


# Create an llm object to use for the QueryEngine and the ReActAgent
llm = OpenAI(model="gpt-4")

# Set up Phoenix

In [19]:
import phoenix as px
session = px.launch_app()

Existing running Phoenix instance detected! Shutting it down and starting a new instance...


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [5]:
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from phoenix.otel import register

tracer_provider = register()
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: default
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {'user-agent': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



## Load Documents

In [20]:
try:
    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/sorge"
    )
    lyft_index = load_index_from_storage(storage_context)

    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/fischer"
    )
    uber_index = load_index_from_storage(storage_context)

    index_loaded = True
except:
    index_loaded = False

This is the point we create our vector indexes, by calculating the embedding vectors for each of the chunks. You only need to run this once.

In [21]:
if not index_loaded:
    # load data
    sorge_docs = SimpleDirectoryReader(
        input_files=["./10k/rapa_sorge_2014.pdf"]
    ).load_data()
    fischer_docs = SimpleDirectoryReader(
        input_files=["./10k/rapa_fischer_2015.pdf"]
    ).load_data()

    # build index
    sorge_index = VectorStoreIndex.from_documents(sorge_docs, show_progress=True)
    fischer_index = VectorStoreIndex.from_documents(fischer_docs, swow_progress=True)

    # persist index
    sorge_index.storage_context.persist(persist_dir="./storage/sorge")
    fischer_index.storage_context.persist(persist_dir="./storage/fischer")

Parsing nodes: 100%|██████████| 71/71 [00:00<00:00, 448.02it/s]
Generating embeddings: 100%|██████████| 71/71 [00:00<00:00, 84.89it/s]


Now create the query engines.

In [22]:
sorge_engine = sorge_index.as_query_engine(similarity_top_k=3, llm=llm)
fischer_engine = fischer_index.as_query_engine(similarity_top_k=3, llm=llm)

We can now define the query engines as tools that will be used by the agent.

As there is a query engine per document we need to also define one tool for each of them.

In [23]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=sorge_engine,
        metadata=ToolMetadata(
            name="sorge_10k",
            description=(
                "Provides information about Rapamycin according to Sorge 2014 science paper. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=fischer_engine,
        metadata=ToolMetadata(
            name="fischer_10k",
            description=(
                "Provides information about Rapamycin according to Fischer 2015 science paper. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
]

## Creating the Agent
Now we have all the elements to create a LlamaIndex ReactAgent

In [24]:
agent = ReActAgent.from_tools(
    query_engine_tools,
    llm=llm,
    verbose=True,
    max_turns=10,
)

Now we can interact with the agent and ask a question.

In [26]:
response = agent.chat("What type of extrapolation can you make about Rapamycin from the two scientific articles?")
print(str(response))

> Running step 266f1145-e90f-4870-ab15-d59d53663e76. Step input: What type of extrapolation can you make about Rapamycin from the two scientific articles?
[1;3;38;5;200mThought: The user is asking for an extrapolation from the two scientific articles. I need to use both tools to gather information from the Sorge 2014 and Fischer 2015 papers.
Action: sorge_10k
Action Input: {'input': 'What information can be extrapolated about Rapamycin from the Sorge 2014 paper?'}
[0m[1;3;34mObservation: The Sorge 2014 paper investigates the effects of long-term rapamycin supplementation on a cancer model. The study found that there were no significant differences in ACF formation in the colon of both young and old mice when compared to the controls. The study also found that rapamycin works by downregulating the activity of mTORC1, as evidenced by significant differences in the levels of p-S6K and 4-EBP1, which are direct targets of mTOR phosphorylation. However, in old mice, there was an increase 

In [34]:
from twilio.rest import Client

In [36]:
call = client.calls.create(
  url="http://demo.twilio.com/docs/voice.xml",
  to="+15108474160",
  from_="+18337041925"
)

print(call.sid)

CAd19402b500b7bffe9a0e3dbbc35f1595


In [35]:
@app.route("/answer", methods=['POST'])
def answer_call():
    """Handle incoming calls and generate TwiML response"""
    # Create TwiML response object
    response = VoiceResponse()
    
    # Get the spoken text from the call
    spoken_text = request.values.get('SpeechResult', '')
    
    if not spoken_text:
        # If no speech detected, prompt the user to speak
        gather = response.gather(
            input='speech',
            action='/process_speech',
            timeout=3,
            language='en-US'
        )
        gather.say('Please ask your question about Rapamycin.')
    else:
        # Forward to processing endpoint
        response.redirect('/process_speech')
    
    return str(response)

@app.route("/process_speech", methods=['POST'])
def process_speech():
    """Process the speech and get response from agent"""
    # Create TwiML response object
    response = VoiceResponse()
    
    # Get the spoken text
    spoken_text = request.values.get('SpeechResult', '')
    
    try:
        # Get response from your agent
        agent_response = agent.chat(spoken_text)
        
        # Convert agent response to string and clean up if needed
        tts_text = str(agent_response)
        
        # Have Twilio speak the response
        response.say(tts_text, voice='alice')
        
    except Exception as e:
        # Handle any errors
        response.say("I'm sorry, there was an error processing your request.")
        print(f"Error: {str(e)}")
    
    return str(response)

@app.route("/call", methods=['POST'])
def make_call():
    """Initiate a call to a specified number"""
    to_number = request.form['to']
    
    try:
        call = client.calls.create(
            to=to_number,
            from_='your_twilio_phone_number',
            url=request.url_root + 'answer'
        )
        return f"Call initiated with SID: {call.sid}"
    except Exception as e:
        return f"Error making call: {str(e)}"

if __name__ == "__main__":
    app.run(debug=True)

AssertionError: View function mapping is overwriting an existing endpoint function: answer_call