## Llama-Index Agents + Custom Evaluations

In [1]:
# Setup OpenAI Agent
from llama_index.agent import OpenAIAgent
import openai
openai.api_key = '...'

In [2]:
import os
os.environ["OPENAI_API_KEY"] = "..."
os.environ["HUGGINGFACE_API_KEY"] = "..."

In [3]:
# Import and initialize our tool spec
from llama_hub.tools.yelp.base import YelpToolSpec
from llama_index.tools.tool_spec.load_and_search.base import LoadAndSearchToolSpec

tool_spec = YelpToolSpec(api_key='...', client_id='...')

In [4]:
def llm_standalone(prompt):
    return openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
            {"role": "system", "content": "You are a question and answer bot, and you answer concisely."},
            {"role": "user", "content": prompt}
        ]
    )["choices"][0]["message"]["content"]

In [5]:
llm_standalone("what are good restaurants in toronto?")

'Some popular restaurants in Toronto include Canoe, Richmond Station, Alo, Pai, Buca, and Nora Gray.'

In [6]:
# Create the Agent with our tools
tools = tool_spec.to_tool_list()
agent = OpenAIAgent.from_tools(
    [
        *LoadAndSearchToolSpec.from_defaults(tools[0]).to_tool_list(),
        *LoadAndSearchToolSpec.from_defaults(tools[1]).to_tool_list()
    ],
    verbose=True
)

In [7]:
agent.chat("what are good restaurants in toronto")

=== Calling Function ===
Calling function: business_search with args: {
  "location": "Toronto",
  "term": "restaurants"
}
Got output: Content loaded! You can now search the information using read_business_search
=== Calling Function ===
Calling function: read_business_search with args: {
  "query": "good restaurants in Toronto"
}
Got output: 
Mira, Pai Northern Thai Kitchen, Richmond Station, Rasa, Katsuya


AgentChatResponse(response='Here are some good restaurants in Toronto:\n\n1. Mira\n2. Pai Northern Thai Kitchen\n3. Richmond Station\n4. Rasa\n5. Katsuya\n\nThese restaurants offer a variety of cuisines and have received positive reviews. Enjoy your dining experience in Toronto!', sources=[ToolOutput(content='Content loaded! You can now search the information using read_business_search', tool_name='business_search', raw_input={'args': (), 'kwargs': {'location': 'Toronto', 'term': 'restaurants'}}, raw_output='Content loaded! You can now search the information using read_business_search'), ToolOutput(content='\nMira, Pai Northern Thai Kitchen, Richmond Station, Rasa, Katsuya', tool_name='read_business_search', raw_input={'args': (), 'kwargs': {'query': 'good restaurants in Toronto'}}, raw_output='\nMira, Pai Northern Thai Kitchen, Richmond Station, Rasa, Katsuya')])

In [8]:
from trulens_eval import Feedback, OpenAI, Tru, TruBasicApp, TruLlama

tru = Tru()

class OpenAI_custom(OpenAI):
    def definitive(self, response: str) -> float:

        return float(openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
            {"role": "system", "content": "Your job is to rate how definitive the following text is on a scale of 1 to 10. Respond with the number only."},
            {"role": "user", "content": response}
        ]
    )["choices"][0]["message"]["content"]) / 10

custom = OpenAI_custom()
definitive = Feedback(custom.definitive).on_output()

✅ In definitive, input response will be set to *.__record__.main_output or `Select.RecordOutput` .


In [9]:
tru.run_dashboard()

Starting dashboard ...


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.4.23:8502 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [10]:
standalone_app = TruBasicApp(llm_standalone, app_id="OpenAIChatCompletion", feedbacks=[definitive])

✅ app OpenAIChatCompletion -> default.sqlite
✅ feedback def. feedback_definition_hash_67c1be2d40f9350281a8eae3a1fed572 -> default.sqlite


In [11]:
yelp_app = TruLlama(agent,
    app_id='YelpAgent',
    feedbacks=[definitive])

✅ app YelpAgent -> default.sqlite
✅ feedback def. feedback_definition_hash_67c1be2d40f9350281a8eae3a1fed572 -> default.sqlite


In [23]:
standalone_app.call_with_record("Does orphan andy's serve breakfast?")

("No, Orphan Andy's does not serve breakfast.",
 Record(record_id='record_hash_56cd2d7fb4ee92766460c2c811d60a81', app_id='OpenAIChatCompletion', cost=Cost(n_requests=1, n_successful_requests=1, n_classes=0, n_tokens=45, n_prompt_tokens=34, n_completion_tokens=11, cost=7.300000000000001e-05), perf=Perf(start_time=datetime.datetime(2023, 7, 21, 8, 16, 9, 961327), end_time=datetime.datetime(2023, 7, 21, 8, 16, 10, 989521)), ts=datetime.datetime(2023, 7, 21, 8, 16, 10, 989549), tags='-', main_input="Does orphan andy's serve breakfast?", main_output="No, Orphan Andy's does not serve breakfast.", main_error='None', calls=[RecordAppCall(stack=(RecordAppCallMethod(path=JSONPath().app, method=Method(obj=Obj(cls=trulens_eval.tru_basic_app.TruWrapperApp, id=5389436560), name='<lambda>')),), args={'args': ["Does orphan andy's serve breakfast?"]}, rets="No, Orphan Andy's does not serve breakfast.", error=None, perf=Perf(start_time=datetime.datetime(2023, 7, 21, 8, 16, 9, 997289), end_time=datetime.

In [24]:
yelp_app.query("Does orphan andy's serve breakfast?")

✅ record record_hash_56cd2d7fb4ee92766460c2c811d60a81 from OpenAIChatCompletion -> default.sqlite
✅ feedback feedback_result_hash_d95e787617fa4d8b1d3c2afa4e688022 on record_hash_56cd2d7fb4ee92766460c2c811d60a81 -> default.sqlite
=== Calling Function ===
Calling function: business_search with args: {
  "location": "Orphan Andy's",
  "term": "breakfast"
}
Got output: Content loaded! You can now search the information using read_business_search
=== Calling Function ===
Calling function: read_business_search with args: {
  "query": "Does Orphan Andy's serve breakfast?"
}
Got output: 
Yes, Orphan Andy's serves breakfast. This is indicated by the 'categories' field, which includes 'Breakfast & Brunch'.


Response(response="Yes, Orphan Andy's serves breakfast. They offer a variety of breakfast options as part of their menu.", source_nodes=[], metadata=None)

✅ record record_hash_3801a3851a89fff93e0d9f6071c98e7c from YelpAgent -> default.sqlite
✅ feedback feedback_result_hash_01ce04ac05a0afe053d9f85dc346eda9 on record_hash_3801a3851a89fff93e0d9f6071c98e7c -> default.sqlite
