Added Promptfoo (#636)

* added class EvalPromptfoo * Add eval_model to EvalPromptfoo --------- Co-authored-by: Dhruv Chawla <dhruv@uptrain.ai>
uptrain-ai · Mar 15, 2024 · 44fcd88 · 44fcd88
1 parent 1b713a8
commit 44fcd88
Show file tree

Hide file tree

Showing 6 changed files with 335 additions and 233 deletions.
diff --git a/examples/integrations/observation_tools/promptfoo/promptfoo.ipynb b/examples/integrations/observation_tools/promptfoo/promptfoo.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,6 +34,7 @@ dependencies = [
     "openai>=1.6.1",
     "fsspec",
     "litellm",
+    "pyyaml"
 ]
 
 [project.urls]

diff --git a/uptrain/__init__.pyi b/uptrain/__init__.pyi
@@ -8,6 +8,7 @@ __all__ = [
     "ResponseMatching",
     "Settings",
     "EvalLlamaIndex",
+    "EvalPromptfoo",
     "CustomPromptEval",
     "RcaTemplate",
     "JailbreakDetection",
@@ -27,3 +28,4 @@ from .framework.evals import (
 from .framework.evalllm import EvalLLM
 from .framework.rca_templates import RcaTemplate
 from .integrations.llama_index import EvalLlamaIndex
+from .integrations.promptfoo import EvalPromptfoo
diff --git a/uptrain/integrations/__init__.pyi b/uptrain/integrations/__init__.pyi
@@ -1,3 +1,4 @@
-__all__ = ["EvalLlamaIndex"]
+__all__ = ["EvalLlamaIndex", "EvalPromptfoo"]
 
 from .llama_index import EvalLlamaIndex
+from .promptfoo import EvalPromptfoo
diff --git a/uptrain/integrations/promptfoo.py b/uptrain/integrations/promptfoo.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+from loguru import logger
+import polars as pl
+import pandas as pd
+import typing as t
+import subprocess
+import time
+from uptrain.integrations import promptfoo_utils as pr_u
+
+timestr = time.strftime("%m_%d_%Y-%H:%M:%S")  # used for filename
+
+__all__ = ["EvalPromptfoo"]
+
+
+class EvalPromptfoo:
+    def __init__(self) -> None:
+        return None
+
+    def evaluate(
+        self,
+        evals_list: t.Union[list],
+        evals_weight: t.Union[list],
+        input_data: t.Union[list[dict], pl.DataFrame, pd.DataFrame],
+        threshold: t.Union[float],
+        prompts: t.Union[str],
+        providers: t.Union[list],
+        redirect_webview: t.Optional[bool] = False,
+        output_file: t.Optional[str] = "results_" + timestr + ".csv",
+        port: t.Optional[int] = pr_u.generate_open_port(),
+        eval_model: t.Optional[str] = "gpt-3.5-turbo-1106",
+    ):
+        evals_compiled = pr_u.compile_evals(evals_list, evals_weight)
+
+        if isinstance(input_data, pl.DataFrame):
+            input_data = input_data.to_dicts()
+        elif isinstance(input_data, pd.DataFrame):
+            input_data = input_data.to_dict(orient="records")
+
+        yaml_data = {
+            "prompts": prompts,
+            "providers": providers,
+            "tests": [
+                {
+                    "description": "Data " + str(i + 1),
+                    "vars": {
+                        "question": input_data[i]["question"],
+                        "context": input_data[i]["context"],
+                    },
+                    "threshold": threshold,
+                    "assert": [
+                        {
+                            "type": "python",
+                            "value": pr_u.format_uptrain_template(
+                                input_data[i], evals_compiled[j], threshold, eval_model
+                            ),
+                        }
+                        for j in range(len(evals_compiled))
+                    ],
+                }
+                for i in range(len(input_data))
+            ],
+        }
+
+        pr_u.generate_promptfoo_yaml_file(yaml_data)
+        try:
+            subprocess.run(
+                ["npx", "--yes", "promptfoo@latest", "eval", "-o", output_file]
+            )
+            logger.success("Evaluations successfully generated")
+            logger.success("Saved results to file: " + output_file)
+            if redirect_webview:
+                try:
+                    if isinstance(port, int):
+                        subprocess.run(
+                            ["npx", "promptfoo@latest", "view", "-y", "-p", str(port)]
+                        )
+                        logger.success(
+                            "Successfully rerouted to promptfoo dashboards @ http://localhost:"
+                            + str(port)
+                            + "/"
+                        )
+                    else:
+                        subprocess.run(["npx", "promptfoo@latest", "view", "-y"])
+                        logger.success(
+                            "Successfully rerouted to promptfoo dashboards @ http://localhost:"
+                            + str(pr_u.DEFAULT_PORT_PROMPTFOO)
+                        )
+                except Exception as e:
+                    logger.error(f"Failed to generate a view: {e}")
+            else:
+                try:
+                    if isinstance(port, int):
+                        subprocess.run(
+                            ["npx", "promptfoo@latest", "view", "-p", str(port)]
+                        )
+                        logger.success(
+                            "Open "
+                            + str(port)
+                            + "/"
+                            + " in webbrowser to view dashboards"
+                        )
+                    else:
+                        subprocess.run(["npx", "promptfoo@latest", "view"])
+                        logger.success(
+                            "Open http://localhost:"
+                            + str(pr_u.DEFAULT_PORT_PROMPTFOO)
+                            + "/"
+                            + " in webbrowser to view dashboards"
+                        )
+                except Exception as e:
+                    logger.error(f"Failed to generate a view: {e}")
+        except Exception as e:
+            logger.error(f"Evaluation failed with error: {e}")
+            raise e
+        return None
diff --git a/uptrain/integrations/promptfoo_utils.py b/uptrain/integrations/promptfoo_utils.py
@@ -0,0 +1,134 @@
+from loguru import logger
+import inspect
+import yaml
+from uptrain import Evals
+import socket
+
+DEFAULT_PORT_PROMPTFOO = 15500
+"""
+evals: [Evals.Metric1, Evals.Metric2]
+weight: [Weight1, Weight2]
+
+Basically weights help to generate weighted average of the metrics we are using
+
+func(compile_evals): we are creating a list which looks like:
+[
+    {
+        'eval_type': Evals.Metri1,
+        'eval_weight': Weight1,
+        'score_type':  score_metric1,
+        'explanation_type':  explanation_metric1
+    }
+]
+"""
+
+
+def compile_evals(evals, weight):
+    if len(evals) != len(weight):
+        logger.error("Length of evals != Length of weight")
+    if not all(isinstance(_, Evals) for _ in evals):
+        logger.error("Please check the list of evals")
+    res_dict = [
+        {
+            "eval_type": str(evals[i]),
+            "eval_weight": weight[i],
+            "score_type": "score_" + evals[i].value,
+            "explanation_type": "explanation_" + evals[i].value,
+        }
+        for i in range(len(evals))
+    ]
+    return res_dict
+
+
+"""
+func(tempalate_uptrain_eval):
+In itself this function may not look like a function.
+Basically, it's more of a template.
+We will be using the information inside this function to create yaml file for promptfoo (through inspect)
+"""
+
+
+def tempalate_uptrain_eval():
+    from uptrain import EvalLLM, Settings, Evals
+    import os
+
+    data = [{"question": input_question, "context": input_context, "response": output}]
+    settings = Settings(openai_api_key=os.environ["OPENAI_API_KEY"], model=eval_model)
+    eval_llm = EvalLLM(settings=settings)
+    results = eval_llm.evaluate(data=data, checks=eval_type)
+    if results[0][score_var] > threshold_var:
+        return {
+            "pass": True,
+            "score": results[0][score_var],
+            "reason": results[0][explanation_var],
+        }
+    else:
+        return {
+            "pass": False,
+            "score": results[0][score_var],
+            "reason": results[0][explanation_var],
+        }
+
+
+"""
+func(format_uptrain_template): Adds individual variables to the template
+"""
+
+
+def format_uptrain_template(data, evals_compiled, threshold, eval_model):
+    uptrain_template_lines = inspect.getsourcelines(tempalate_uptrain_eval)[0]
+    uptrain_template = "".join(uptrain_template_lines[1:])
+
+    uptrain_template = uptrain_template.replace(
+        "input_question", "'{}'".format(data["question"])
+    )
+    uptrain_template = uptrain_template.replace(
+        "input_context", "'{}'".format(data["context"])
+    )
+    uptrain_template = uptrain_template.replace(
+        "eval_type", "[{}]".format(evals_compiled["eval_type"])
+    )
+    uptrain_template = uptrain_template.replace(
+        "score_var", "'{}'".format(evals_compiled["score_type"])
+    )
+    uptrain_template = uptrain_template.replace(
+        "explanation_var", "'{}'".format(evals_compiled["explanation_type"])
+    )
+    uptrain_template = uptrain_template.replace("threshold_var", "{}".format(threshold))
+    uptrain_template = uptrain_template.replace("eval_model", "'{}'".format(eval_model))
+    return uptrain_template
+
+
+"""
+Generate yaml file for promptfoo
+"""
+
+
+def generate_promptfoo_yaml_file(py_obj):
+    try:
+        with open("promptfooconfig.yaml", "w") as f:
+            yaml.dump(py_obj, f, sort_keys=False)
+            logger.success("Succesfully generated file: promptfooconfig.yaml")
+    except:
+        logger.error("Unable to generate file: promptfooconfig.yaml")
+
+
+"""
+Used to check for an open port
+"""
+
+
+def generate_open_port():
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    port_res = sock.connect_ex(
+        ("localhost", DEFAULT_PORT_PROMPTFOO)
+    )  # if 0 then default: DEFAULT_PORT_PROMPTFOO already in use
+    if port_res == 0:
+        logger.info("Default port " + str(DEFAULT_PORT_PROMPTFOO) + " already in use")
+        sock = socket.socket()
+        sock.bind(("", 0))
+        open_port = sock.getsockname()[1]
+        logger.info("Using port: " + str(open_port))
+    else:
+        open_port = 15500
+    return open_port