In [1]:
# pip uninstall -y trulens_eval
# pip install git+https://github.com/truera/trulens@piotrm/azure_bugfixes#subdirectory=trulens_eval

# trulens_eval notebook dev

%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

base = Path().cwd()
while not (base / "trulens_eval").exists():
    base = base.parent

print(base)

# If running from github repo, can use this:
sys.path.append(str(base))

# Uncomment for more debugging printouts.
"""
import logging
root = logging.getLogger()
root.setLevel(logging.DEBUG)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root.addHandler(handler)
"""

from trulens_eval.keys import check_keys

check_keys(
    "OPENAI_API_KEY",
    "HUGGINGFACE_API_KEY"
)

# from trulens_eval import Tru
# tru = Tru()
# tru.reset_database()
# tru.run_dashboard(_dev=base, force=True)

/Volumes/dev_new/trulens/trulens_eval
✅ Key OPENAI_API_KEY set from environment (same value found in .env file at /Volumes/dev_new/.env).
✅ Key HUGGINGFACE_API_KEY set from environment (same value found in .env file at /Volumes/dev_new/.env).


In [2]:
from trulens_eval.feedback.v2.feedback import Template, Insensitivity

t = Template.from_template("hello there")

In [3]:
from trulens_eval.feedback.provider.hugs import Dummy
from trulens_eval import Select
from trulens_eval.feedback.feedback import Feedback

In [4]:
import inspect
from typing import Tuple
from trulens_eval.utils.text import make_retab

fimp = Dummy().language_match

invert_template = """
You are a fuzzing tool. Your job is to provide inputs to a function that will
achieve a desired output. You are given a description of the function. Some
inputs may be fixed, others are to be determined by you.

BEGIN FUNCTION DESCRIPTION
{imp_doc}
END FUNCTION DESCRIPTION

BEGIN TARGET OUTPUT
{target}
END TARGET OUTPUT

BEGIN FIXED INPUTS
{fixed_inputs}
END FIXED INPUTS
"""

def invert_feedback(imp, target, **kwargs):
    """Try to fill in input values to feedback function `imp` to achieve output
    value `target`. Any `kwargs` provided fix `imp` arguments to the given
    values.
    """

    doc = imp.__doc__

    filled_template = invert_template.format(
        imp_doc=doc,
        target=target,
        fixed_inputs="\n".join(
            f"{k}={v}" for k, v in kwargs.items()
        )
    )

    return filled_template

#BEGIN FUNCTION DOCSTRING
#{imp_doc}
#END FUNCTION DOCSTRING

doc_template = """
You are a python method summarization tool. Your job is to summarize the
purpose, implementation, arguments, and returns of a given method based on its
signature and source code.

BEGIN FUNCTION SIGNATURE
{sig}
END FUNCTION SIGNATURE

BEGIN FUNCTION SOURCE
{src}
END FUNCTION SOURCE

Summarize the purpose of the method. Please be as concise as possible and avoid
mentioning how the method is implemented or what tools it uses to achieve its
purpose. Only summarize the purpose.

PURPOSE: <overall method purpose>

Summarize how the method is implemented:
IMPLEMENTATION: <how the method achieves its purpose>

Summarize each of these arguments in this form, one per line:
{args_templates}

Summarize the function's return value. List its type, overall interpretation,
and an interpretation extremal values it could achieve.
{rets_templates}
"""

arg_template = """ARGUMENT({name}: {type}): <argument_description>"""

ret_template = """
RETURN({type}): <return1_description>
RETURNVALUE(<return1_value1>): <interpretation for this return value>
RETURNVALUE(<return1_value2>): <interpretation for this return value>
"""

def doc_feedback(imp):
    """Try to fill in input values to feedback function `imp` to achieve output
    value `target`. Any `kwargs` provided fix `imp` arguments to the given
    values.
    """

    doc = imp.__doc__
    sig = inspect.signature(imp)

    rt = make_retab("  ")

    arg_templates = "\n".join(
        arg_template.format(name=arg.name, type=arg.annotation.__name__)
        for arg in sig.parameters.values() if arg.name != "self"
    )

    ret_annot = sig.return_annotation

    if hasattr(ret_annot, "__args__"):
        ret_types = [ret_annot.__args__[0]]
    else:
        ret_types = [ret_annot]

    rets_templates = "\n".join(
        ret_template.format(type=ret_type.__name__)
        for ret_type in ret_types
    )
    
    filled_template = doc_template.format(
        imp_doc=rt(doc),
        sig=rt(imp.__name__ + str(sig)),
        src=rt(inspect.getsource(fimp)),
        args_templates=arg_templates,
        rets_templates=rets_templates
    )

    return filled_template


# invert_feedback(fimp, 1.0, text1="How are you?")
prompt = doc_feedback(Dummy().toxic)

In [5]:
import dataclasses
from typing import Dict, Any, Type
import re
from textwrap import wrap

@dataclasses.dataclass
class LLMDoc():
    purpose: str

    details: str

    args: Dict[Tuple[str, Type], str]

    ret: str
    ret_type: Type
    rets: Dict[Any, str]

    def as_docstring(self, initial_indent=0, tabsize=4):
        tab1 = make_retab("  ")

        space = 80 - initial_indent

        doc_purpose = "\n".join(wrap(
            "\"\"\"" + self.purpose,
            width=space,
            initial_indent=" " * initial_indent,
            subsequent_indent=" " * initial_indent
        ))

        doc_details = "\n".join(wrap(
            self.details,
            width=space,
            initial_indent=" " * initial_indent,
            subsequent_indent=" " * initial_indent
        ))

        doc_args = " " * initial_indent + "Args:\n"
        for (name, type_), desc in self.args.items():
            doc_args += "\n".join(wrap(
                f"{name}: {desc}",
                width=space - initial_indent - tabsize,
                initial_indent=" " * (initial_indent + tabsize),
                subsequent_indent=" " * (initial_indent + 2*tabsize)
            )) + "\n\n"

        doc_rets = " " * initial_indent + "Returns:\n"
        doc_rets += "\n".join(wrap(
            f"{self.ret_type}: {self.ret}",
            width=space - tabsize,
            initial_indent=" " * (initial_indent + tabsize),
            subsequent_indent=" " * (initial_indent + 2*tabsize)
        )) + "\n\n"

        for ret, desc in self.rets.items():
            doc_rets += "\n".join(wrap(
                f"{ret}: {desc}",
                width=space - 2*tabsize,
                initial_indent=" " * (initial_indent + 2*tabsize),
                subsequent_indent=" " * (initial_indent + 3*tabsize)
            )) + "\n\n"

        
        doc = f"""{doc_purpose}

{doc_details}

{doc_args}

{doc_rets}
{" " * initial_indent}\"\"\"
"""
        return doc

re_purpose = re.compile(r"PURPOSE: (.*)")
re_arg = re.compile(r"ARGUMENT\((.*): (.*)\): (.*)")
re_returnvalue = re.compile(r"RETURNVALUE\((.*)\): (.*)")
re_return = re.compile(r"RETURN\((.*)\): (.*)")
re_details = re.compile(r"DETAILS: (.*)")

def parse_result(res: str) -> LLMDoc:
    lines = res.split("\n")

    doc_args = {}
    doc_rets = {}
    doc = {'args': doc_args, 'rets': doc_rets}

    for line in lines:
        if match := re_purpose.fullmatch(line):
            doc['purpose'] = match.group(1)

        elif match := re_details.fullmatch(line):
            doc['details'] = match.group(1)

        elif match := re_returnvalue.fullmatch(line):
            arg, desc = match.groups()
            doc_rets[arg] = desc

        elif match := re_return.fullmatch(line):
            doc['ret'] = match.group(2)
            doc['ret_type'] = match.group(1)

        elif match := re_arg.fullmatch(line):
            name, type_, desc = match.groups()
            doc_args[(name, type_)] = desc

    return LLMDoc(**doc)

from pprint import pprint

doc = parse_result("""PURPOSE: Calculate the likelihood that two texts are in the same natural language.

DETAILS: The method uses a language detection API to get language scores for the input texts, calculates the difference in scores, and returns a value based on the difference.

ARGUMENT(text1: str): The first text to evaluate.
ARGUMENT(text2: str): The second text to evaluate.

RETURN(float): A value between 0.0 and 1.0. The value 0.0 indicates that the input texts were of different languages, and 1.0 indicates they are in the same language.
RETURNVALUE(0.0): Input texts are of different languages.
RETURNVALUE(1.0): Input texts are in the same language.""")

print(doc.as_docstring(initial_indent=4))

    """Calculate the likelihood that two texts are in the same natural
    language.

    The method uses a language detection API to get language scores for the
    input texts, calculates the difference in scores, and returns a value
    based on the difference.

    Args:
        text1: The first text to evaluate.

        text2: The second text to evaluate.



    Returns:
        float: A value between 0.0 and 1.0. The value 0.0 indicates that
            the input texts were of different languages, and 1.0
            indicates they are in the same language.

            0.0: Input texts are of different languages.

            1.0: Input texts are in the same language.


    """



In [None]:
# res = c.completions.create#(model="gpt-3.5-turbo", prompt=prompt)
# help(c.chat.completions.create)

import openai
c = openai.OpenAI()

d = Dummy()

for fimp in [d.language_match]:#, d.positive_sentiment, d.toxic, d.pii_detection, d.hallucination_evaluator]:
    print(fimp.__name__)
    prompt = doc_feedback(fimp)
    res = c.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "system", "content": prompt}], temperature=0.0)
    print(res.choices[0].message.content)
    print()