# Dev Notebook

This notebook loads the version of trulens_eval from the enclosing repo folder. You can use this to debug or devlop trulens_eval features.

In [None]:
# pip uninstall -y trulens_eval
# pip install git+https://github.com/truera/trulens@piotrm/azure_bugfixes#subdirectory=trulens_eval

# trulens_eval notebook dev

%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

base = Path().cwd()
while not (base / "trulens_eval").exists():
    base = base.parent

print(base)

# If running from github repo, can use this:
sys.path.append(str(base))

# Uncomment for more debugging printouts.
"""
import logging
root = logging.getLogger()
root.setLevel(logging.DEBUG)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root.addHandler(handler)
"""

from trulens_eval.keys import check_keys

check_keys(
    "OPENAI_API_KEY",
    "HUGGINGFACE_API_KEY"
)

# from trulens_eval import Tru
# tru = Tru()
# tru.reset_database()
# tru.run_dashboard(_dev=base, force=True)

In [None]:
from trulens_eval.feedback.v2.feedback import Template, Insensitivity

t = Template.from_template("hello there")

In [None]:
from trulens_eval.tru_llama import TruLlama

check_keys("OPENAI_API_KEY", "HUGGINGFACE_API_KEY")
import os

from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex
if not os.path.exists("data/paul_graham_essay.txt"):
    os.system(
        'wget https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt -P data/'
    )

documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)

query_engine = index.as_query_engine()

# This test does not run correctly if async is used, i.e. not using
# `sync` to convert to sync.

In [None]:
from trulens_eval.feedback.provider.hugs import Dummy
from trulens_eval import Select
from trulens_eval.feedback.feedback import Feedback

f = Feedback(Dummy().language_match).on(Select.RecordCalls._retriever.retrieve.rets[42])

tru_query_engine_recorder = TruLlama(query_engine, feedbacks=[f])

In [None]:
import inspect
from typing import Tuple
from trulens_eval.utils.text import make_retab

fimp = Dummy().language_match

invert_template = """
You are a fuzzing tool. Your job is to provide inputs to a function that will
achieve a desired output. You are given a description of the function. Some
inputs may be fixed, others are to be determined by you.

BEGIN FUNCTION DESCRIPTION
{imp_doc}
END FUNCTION DESCRIPTION

BEGIN TARGET OUTPUT
{target}
END TARGET OUTPUT

BEGIN FIXED INPUTS
{fixed_inputs}
END FIXED INPUTS
"""

def invert_feedback(imp, target, **kwargs):
    """Try to fill in input values to feedback function `imp` to achieve output
    value `target`. Any `kwargs` provided fix `imp` arguments to the given
    values.
    """

    doc = imp.__doc__

    filled_template = invert_template.format(
        imp_doc=doc,
        target=target,
        fixed_inputs="\n".join(
            f"{k}={v}" for k, v in kwargs.items()
        )
    )

    return filled_template

#BEGIN FUNCTION DOCSTRING
#{imp_doc}
#END FUNCTION DOCSTRING

doc_template = """
You are a python method summarization tool. Your job is to summarize the
purpose, implementation, arguments, and returns of a given method based on its
signature and source code.

BEGIN FUNCTION SIGNATURE
{sig}
END FUNCTION SIGNATURE

BEGIN FUNCTION SOURCE
{src}
END FUNCTION SOURCE

Summarize the purpose of the method without saying how it is implemented or what
tools are used to implement it.
PURPOSE: <overall method purpose>

Summarize how the method is implemented:
IMPLEMENTATION: <how the method achieves its purpose>

Summarize each of these arguments in this form, one per line:
{args_templates}

Summarize the function's return value. List its type, overall interpretation,
and an interpretation extremal values it could achieve.
{rets_templates}
"""

arg_template = """ARGUMENT({name}: {type}): <argument_description>"""

ret_template = """
RETURN({type}): <return1_description>
RETURNVALUE(<return1_value1>): <interpretation for this return value>
RETURNVALUE(<return1_value2>): <interpretation for this return value>
"""

def doc_feedback(imp):
    """Try to fill in input values to feedback function `imp` to achieve output
    value `target`. Any `kwargs` provided fix `imp` arguments to the given
    values.
    """

    doc = imp.__doc__
    sig = inspect.signature(imp)

    rt = make_retab("  ")

    arg_templates = "\n".join(
        arg_template.format(name=arg.name, type=arg.annotation.__name__)
        for arg in sig.parameters.values() if arg.name != "self"
    )

    ret_annot = sig.return_annotation

    if hasattr(ret_annot, "__args__"):
        ret_types = [ret_annot.__args__[0]]
    else:
        ret_types = [ret_annot]

    rets_templates = "\n".join(
        ret_template.format(type=ret_type.__name__)
        for ret_type in ret_types
    )
    
    filled_template = doc_template.format(
        imp_doc=rt(doc),
        sig=rt(imp.__name__ + str(sig)),
        src=rt(inspect.getsource(fimp)),
        args_templates=arg_templates,
        rets_templates=rets_templates
    )

    return filled_template


# invert_feedback(fimp, 1.0, text1="How are you?")
prompt = doc_feedback(Dummy().toxic)

In [None]:
print(prompt)

In [None]:
import openai
c = openai.OpenAI()

In [None]:
# res = c.completions.create#(model="gpt-3.5-turbo", prompt=prompt)
# help(c.chat.completions.create)

d = Dummy()

for fimp in [d.language_match, d.positive_sentiment, d.toxic, d.pii_detection, d.hallucination_evaluator]:
    print(fimp.__name__)
    prompt = doc_feedback(fimp)
    res = c.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "system", "content": prompt}], temperature=0.0)
    print(res.choices[0].message.content)
    print()

In [None]:
from trulens_eval.utils.asynchro import sync

llm_response_async, record_async = sync(tru_query_engine_recorder.awith_record,
    query_engine.aquery, "What did the author do growing up?"
)
record_async

In [None]:
tru_query_engine_recorder = TruLlama(query_engine)
#with tru_query_engine_recorder as recording:
llm_response_async, record = await tru_query_engine_recorder.awith_record(query_engine.aquery, "What did the author do growing up?")

#record_async = recording.get()

In [None]:
tru_query_engine_recorder = TruLlama(query_engine)
with tru_query_engine_recorder as recording:
    llm_response_async = query_engine.aquery("What did the author do growing up?")

#record_async = recording.get()

In [None]:
recording.records

In [None]:
from llama_index.core.base_query_engine import BaseQueryEngine
isinstance(query_engine, BaseQueryEngine)

In [None]:
query_engine = index.as_query_engine()
tru_query_engine_recorder = TruLlama(query_engine)
with tru_query_engine_recorder as recording:
    llm_response_sync = query_engine.query(
        "What did the author do growing up?"
    )
record_sync = recording.get()