Skip to content

Commit

Permalink
Added Promptfoo (#636)
Browse files Browse the repository at this point in the history
* added class EvalPromptfoo

* Add eval_model to EvalPromptfoo

---------

Co-authored-by: Dhruv Chawla <dhruv@uptrain.ai>
  • Loading branch information
shrjain1312 and Dominastorm committed Mar 15, 2024
1 parent 1b713a8 commit 44fcd88
Show file tree
Hide file tree
Showing 6 changed files with 335 additions and 233 deletions.
313 changes: 81 additions & 232 deletions examples/integrations/observation_tools/promptfoo/promptfoo.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"openai>=1.6.1",
"fsspec",
"litellm",
"pyyaml"
]

[project.urls]
Expand Down
2 changes: 2 additions & 0 deletions uptrain/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ __all__ = [
"ResponseMatching",
"Settings",
"EvalLlamaIndex",
"EvalPromptfoo",
"CustomPromptEval",
"RcaTemplate",
"JailbreakDetection",
Expand All @@ -27,3 +28,4 @@ from .framework.evals import (
from .framework.evalllm import EvalLLM
from .framework.rca_templates import RcaTemplate
from .integrations.llama_index import EvalLlamaIndex
from .integrations.promptfoo import EvalPromptfoo
3 changes: 2 additions & 1 deletion uptrain/integrations/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
__all__ = ["EvalLlamaIndex"]
__all__ = ["EvalLlamaIndex", "EvalPromptfoo"]

from .llama_index import EvalLlamaIndex
from .promptfoo import EvalPromptfoo
115 changes: 115 additions & 0 deletions uptrain/integrations/promptfoo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from __future__ import annotations
from loguru import logger
import polars as pl
import pandas as pd
import typing as t
import subprocess
import time
from uptrain.integrations import promptfoo_utils as pr_u

timestr = time.strftime("%m_%d_%Y-%H:%M:%S") # used for filename

__all__ = ["EvalPromptfoo"]


class EvalPromptfoo:
def __init__(self) -> None:
return None

def evaluate(
self,
evals_list: t.Union[list],
evals_weight: t.Union[list],
input_data: t.Union[list[dict], pl.DataFrame, pd.DataFrame],
threshold: t.Union[float],
prompts: t.Union[str],
providers: t.Union[list],
redirect_webview: t.Optional[bool] = False,
output_file: t.Optional[str] = "results_" + timestr + ".csv",
port: t.Optional[int] = pr_u.generate_open_port(),
eval_model: t.Optional[str] = "gpt-3.5-turbo-1106",
):
evals_compiled = pr_u.compile_evals(evals_list, evals_weight)

if isinstance(input_data, pl.DataFrame):
input_data = input_data.to_dicts()
elif isinstance(input_data, pd.DataFrame):
input_data = input_data.to_dict(orient="records")

yaml_data = {
"prompts": prompts,
"providers": providers,
"tests": [
{
"description": "Data " + str(i + 1),
"vars": {
"question": input_data[i]["question"],
"context": input_data[i]["context"],
},
"threshold": threshold,
"assert": [
{
"type": "python",
"value": pr_u.format_uptrain_template(
input_data[i], evals_compiled[j], threshold, eval_model
),
}
for j in range(len(evals_compiled))
],
}
for i in range(len(input_data))
],
}

pr_u.generate_promptfoo_yaml_file(yaml_data)
try:
subprocess.run(
["npx", "--yes", "promptfoo@latest", "eval", "-o", output_file]
)
logger.success("Evaluations successfully generated")
logger.success("Saved results to file: " + output_file)
if redirect_webview:
try:
if isinstance(port, int):
subprocess.run(
["npx", "promptfoo@latest", "view", "-y", "-p", str(port)]
)
logger.success(
"Successfully rerouted to promptfoo dashboards @ http://localhost:"
+ str(port)
+ "/"
)
else:
subprocess.run(["npx", "promptfoo@latest", "view", "-y"])
logger.success(
"Successfully rerouted to promptfoo dashboards @ http://localhost:"
+ str(pr_u.DEFAULT_PORT_PROMPTFOO)
)
except Exception as e:
logger.error(f"Failed to generate a view: {e}")
else:
try:
if isinstance(port, int):
subprocess.run(
["npx", "promptfoo@latest", "view", "-p", str(port)]
)
logger.success(
"Open "
+ str(port)
+ "/"
+ " in webbrowser to view dashboards"
)
else:
subprocess.run(["npx", "promptfoo@latest", "view"])
logger.success(
"Open http://localhost:"
+ str(pr_u.DEFAULT_PORT_PROMPTFOO)
+ "/"
+ " in webbrowser to view dashboards"
)
except Exception as e:
logger.error(f"Failed to generate a view: {e}")
except Exception as e:
logger.error(f"Evaluation failed with error: {e}")
raise e
return None
134 changes: 134 additions & 0 deletions uptrain/integrations/promptfoo_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
from loguru import logger
import inspect
import yaml
from uptrain import Evals
import socket

DEFAULT_PORT_PROMPTFOO = 15500
"""
evals: [Evals.Metric1, Evals.Metric2]
weight: [Weight1, Weight2]
Basically weights help to generate weighted average of the metrics we are using
func(compile_evals): we are creating a list which looks like:
[
{
'eval_type': Evals.Metri1,
'eval_weight': Weight1,
'score_type': score_metric1,
'explanation_type': explanation_metric1
}
]
"""


def compile_evals(evals, weight):
if len(evals) != len(weight):
logger.error("Length of evals != Length of weight")
if not all(isinstance(_, Evals) for _ in evals):
logger.error("Please check the list of evals")
res_dict = [
{
"eval_type": str(evals[i]),
"eval_weight": weight[i],
"score_type": "score_" + evals[i].value,
"explanation_type": "explanation_" + evals[i].value,
}
for i in range(len(evals))
]
return res_dict


"""
func(tempalate_uptrain_eval):
In itself this function may not look like a function.
Basically, it's more of a template.
We will be using the information inside this function to create yaml file for promptfoo (through inspect)
"""


def tempalate_uptrain_eval():
from uptrain import EvalLLM, Settings, Evals
import os

data = [{"question": input_question, "context": input_context, "response": output}]
settings = Settings(openai_api_key=os.environ["OPENAI_API_KEY"], model=eval_model)
eval_llm = EvalLLM(settings=settings)
results = eval_llm.evaluate(data=data, checks=eval_type)
if results[0][score_var] > threshold_var:
return {
"pass": True,
"score": results[0][score_var],
"reason": results[0][explanation_var],
}
else:
return {
"pass": False,
"score": results[0][score_var],
"reason": results[0][explanation_var],
}


"""
func(format_uptrain_template): Adds individual variables to the template
"""


def format_uptrain_template(data, evals_compiled, threshold, eval_model):
uptrain_template_lines = inspect.getsourcelines(tempalate_uptrain_eval)[0]
uptrain_template = "".join(uptrain_template_lines[1:])

uptrain_template = uptrain_template.replace(
"input_question", "'{}'".format(data["question"])
)
uptrain_template = uptrain_template.replace(
"input_context", "'{}'".format(data["context"])
)
uptrain_template = uptrain_template.replace(
"eval_type", "[{}]".format(evals_compiled["eval_type"])
)
uptrain_template = uptrain_template.replace(
"score_var", "'{}'".format(evals_compiled["score_type"])
)
uptrain_template = uptrain_template.replace(
"explanation_var", "'{}'".format(evals_compiled["explanation_type"])
)
uptrain_template = uptrain_template.replace("threshold_var", "{}".format(threshold))
uptrain_template = uptrain_template.replace("eval_model", "'{}'".format(eval_model))
return uptrain_template


"""
Generate yaml file for promptfoo
"""


def generate_promptfoo_yaml_file(py_obj):
try:
with open("promptfooconfig.yaml", "w") as f:
yaml.dump(py_obj, f, sort_keys=False)
logger.success("Succesfully generated file: promptfooconfig.yaml")
except:
logger.error("Unable to generate file: promptfooconfig.yaml")


"""
Used to check for an open port
"""


def generate_open_port():
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
port_res = sock.connect_ex(
("localhost", DEFAULT_PORT_PROMPTFOO)
) # if 0 then default: DEFAULT_PORT_PROMPTFOO already in use
if port_res == 0:
logger.info("Default port " + str(DEFAULT_PORT_PROMPTFOO) + " already in use")
sock = socket.socket()
sock.bind(("", 0))
open_port = sock.getsockname()[1]
logger.info("Using port: " + str(open_port))
else:
open_port = 15500
return open_port

0 comments on commit 44fcd88

Please sign in to comment.