## Initialize Maxim SDK

In [None]:
from typing import Dict
from dotenv import dotenv_values
from maxim import Config, Maxim
from maxim.evaluators import BaseEvaluator
from maxim.models import (
    LocalEvaluatorResultParameter,
    LocalEvaluatorReturn,
    ManualData,
    PassFailCriteria,
    YieldedOutput,
    QueryBuilder
)
from maxim.models.evaluator import (
    PassFailCriteriaForTestrunOverall,
    PassFailCriteriaOnEachEntry,
)
import dspy

config = dotenv_values()
lm = dspy.LM('openai/gpt-4o-mini')
dspy.configure(lm=lm)

WORKSPACE_ID: str = config.get("MAXIM_WORKSPACE_ID") or ""
WORKFLOW_ID: str = config.get("MAXIM_WORKFLOW_ID") or ""
DATASET_ID: str = config.get("MAXIM_DATASET_ID") or ""
PROMPT_VERSION_ID: str = config.get("MAXIM_PROMPT_VERSION_ID") or ""
MAXIM_UNKNOWN_WORKFLOW_ID: str = config.get("MAXIM_UNKNOWN_WORKFLOW_ID") or ""
MAXIM_INVALID_WORKFLOW_ID: str = config.get("MAXIM_INVALID_WORKFLOW_ID") or ""
MAXIM_PROMPT_ID: str = config.get("MAXIM_PROMPT_ID") or ""

maxim = Maxim(
    config=Config(                
        prompt_management=True
    )
)

## Define local workflow

In [None]:
prompt = maxim.get_prompt(MAXIM_PROMPT_ID,
	QueryBuilder()
	.and_()
	.deployment_var("Environment", "prod")
	.build())

print(prompt.messages)

def run(data: ManualData):
    print(f"processing => {data.get('Input')}")
    response = prompt.run(data.get('Input'))
    content = response.choices[0].message.content
    print("content:", content)
    return YieldedOutput(data=content)

## Define custom evaluator module

In [3]:
# DSPy signature for evaluation
class EvaluationSignature(dspy.Signature):
    input_text = dspy.InputField(desc="The text output to be evaluated.")
    evaluation = dspy.OutputField(desc="Evaluation of the output for bias.")

# DSPy module using Chain-of-Thought
class CustomEvalModule(dspy.Module):
    def __init__(self):
        super().__init__()
        self.evaluator = dspy.ChainOfThought(EvaluationSignature)

    def forward(self, input_text: str):
        result = self.evaluator(input_text=input_text)
        return result.evaluation if hasattr(result, "evaluation") else "No evaluation result."

## Custom evaluator that uses the DSPy module

In [4]:
# Custom evaluator class
class MyCustomEvaluator(BaseEvaluator):
    def __init__(self, pass_fail_criteria):
        super().__init__(pass_fail_criteria=pass_fail_criteria)
        self.custom_eval = CustomEvalModule()

    def evaluate(
        self, result: LocalEvaluatorResultParameter, data: ManualData
    ) -> Dict[str, LocalEvaluatorReturn]:
        # Extract the generated output
        input_text = result.output or ""
        # Run DSPy evaluation
        evaluation_text = self.custom_eval.forward(input_text=input_text)
        return {
            "DSPy Eval": LocalEvaluatorReturn(
                score=1,  # You can customize scoring logic
                reasoning=evaluation_text
            )
        }

## Create and trigger test run

In [None]:
maxim.create_test_run(
    name="Local workflow test run from SDK",
    in_workspace_id=WORKSPACE_ID
).with_data(
    DATASET_ID
).with_concurrency(2).with_evaluators(
    "Bias",
    MyCustomEvaluator(
        pass_fail_criteria={
            "DSPy Eval": PassFailCriteria(
                for_testrun_overall_pass_if=PassFailCriteriaForTestrunOverall(
                    ">", 0, "average"  # Must be positional
                ),
                on_each_entry_pass_if=PassFailCriteriaOnEachEntry(
                    ">", 0  # Must be positional
                ),
            )
        }
    )
).yields_output(run).run()