## Eval existing runs

This is a demonstration how to use trulens without an app but with logs of the results of some app.

In [None]:
# Setup env and keys. This is currently set up for running from github repo.

%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

base = Path().cwd()
while not (base / "trulens").exists():
    base = base.parent

print(base)

# If running from github repo, can use this:
sys.path.append(str(base))

In [None]:
from trulens.core.utils.keys import check_keys

check_keys(
    "OPENAI_API_KEY",
)

In [None]:
from trulens.core import TruSession

tru = TruSession()
tru.reset_database()  # if needed

In [None]:
from trulens.dashboard import run_dashboard

run_dashboard(tru)

In [None]:
from trulens.apps.virtual import VirtualApp
from trulens.core import Select

# VirtualApp setup. You can store any information you would like by passing in a
# VirtualApp or a plain dictionary to TruVirtual (later). This may involve an
# index of components or versions, or anything else. You can refer to these
# values for evaluating feedback.

virtual_app = dict(
    llm=dict(modelname="some llm component model name"),
    template="information about the template I used in my app",
    debug="all of these fields are completely optional",
)

# (Optional) If you use the `VirtualApp` class instead of a plain dictionary,
# you can use selectors to position the virtual app components and their
# properties.

virtual_app = VirtualApp(virtual_app)  # can start with the prior dictionary
virtual_app[Select.RecordCalls.llm.maxtokens] = 1024

# Using Selectors here lets you use reuse the setup you use to define feedback
# functions (later in the notebook). We will use `retriever_component`
# exemplified below place information about retrieved context in a virtual
# record that will match the information about the retriever component in the
# virtual app. While this is not necessary, laying out the virtual app and
# virtual records in a mirrored fashion as would be the same for real apps may
# aid interpretability.

retriever_component = Select.RecordCalls.retriever
virtual_app[retriever_component] = "this is the retriever component"

In [None]:
# Display the virtual app layout:
virtual_app

In [None]:
# Data. To add data to the database, you can either create the `Record`, or use
# `VirtualRecord` class which helps you construct records for virtual models.
# The arguments to VirtualRecord are the same as for Record except that calls
# are specified using selectors. In the below example, we add two records with
# both containing the inputs and outputs to some context retrieval component.
# You do not need to provide information that you do not wish to track or
# evaluate on. The selectors refer to methods which can be selected for in
# feedback which we show below.

from trulens.apps.virtual import VirtualRecord

# The selector for a presumed context retrieval component's call to
# `get_context`. The names are arbitrary but may be useful for readability on
# your end.
context_method = retriever_component.get_context

rec1 = VirtualRecord(
    main_input="Where is Germany?",
    main_output="Germany is in Europe",
    calls={
        context_method: dict(
            args=["Where is Germany?"],
            rets=["Germany is a country located in Europe."],
        )
    },
)

# The same method selector can indicate multiple invocations by mapping to a
# list of Dicts instead of a single Dict:

rec2 = VirtualRecord(
    main_input="Where is Germany?",
    main_output="Poland is in Europe",
    calls={
        context_method: [
            dict(
                args=["Where is Germany?"],
                rets=["Poland is a country located in Europe."],
            ),
            dict(
                args=["Where is Germany?"],
                rets=["Germany is a country located in Europe."],
            ),
        ]
    },
)

data = [rec1, rec2]

# Run to read more about VirtualRecord:
# help(VirtualRecord)

In [None]:
# The same feedback function as the LangChain quickstart except the selector for
# context is different.

from trulens.core import Feedback
from trulens.feedback.v2.feedback import Groundedness
from trulens.providers.openai import OpenAI

# Initialize provider class
openai = OpenAI()

# Select context to be used in feedback. We select the return values of the
# virtual `get_context` call in the virtual `retriever` component. Names are
# arbitrary except for `rets`. If there are multiple calls to this method
# recorded, the first one is used by default though a warning will be issued.
context = context_method.rets[:]
# Same as context = context_method[0].rets[:]

# Alternatively, all of the contexts can be retrieved for use in feedback.
context_all_calls = context_method[:].rets[:]

grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect())  # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()

# Question/statement relevance between question and each context chunk.
f_context_relevance = Feedback(openai.context_relevance).on_input().on(context)

# Question/statement relevance between question and each context chunk and for
# all calls of the context retriever. Note, a different name has to be given as
# otherwise the default names will clash with the other context_relevance above.
f_context_relevance_all_calls = (
    Feedback(openai.context_relevance, name="context_relevance_all_calls")
    .on_input()
    .on(context_all_calls)
)

In [None]:
# Create the virtual recorder with the given feedback functions. Most of the
# fields that other non-virtual apps take can also be specified here.

from trulens.apps.virtual import TruVirtual

virtual_recorder = TruVirtual(
    app_name="a virtual app",
    app=virtual_app,
    feedbacks=[
        f_groundedness,
        f_qa_relevance,
        f_context_relevance,
        f_context_relevance_all_calls,
    ],
)

# Run to read more about TruVirtual:
# help(TruVirtual)

In [None]:
# Add the records. Using `add_record` on `TruVirtual` add the given record to
# the database as well as run the pre-specified feedback functions on it. The
# means of running the feedback functions is the same as in non-virtual apps,
# i.e. specified using `feedback_mode`. If `feedback_mode` is
# `FeedbackMode.WITH_APP`, the calls to `add_record` will block until all
# feedback are evaluated. You can also specify the feedback mode to `add_record`
# to use that mode for that particular record.


for rec in data:
    virtual_recorder.add_record(rec)

    # Can wait for feedback on `add_record`:
    # virtual_recorder.add_record(rec, feedback_mode=FeedbackMode.WITH_APP)

# Run to read more about add_record:
help(virtual_recorder.add_record)

In [None]:
# Retrieve feedback results. You can either browse the dashboard or retrieve the
# results from the record after it has been `add_record`ed.

for rec in data:
    print(rec.main_input, "-->", rec.main_output)

    for feedback, feedback_result in rec.wait_for_feedback_results().items():
        print("\t", feedback.name, feedback_result.result)

    print()

# Run to read more about Feedback and FeedbackResult:
# help(Feedback)
# help(FeedbackResult)