# Summary Evaluators

### Setup

In [None]:
# You can set them inline
import os
os.environ["OPENAI_API_KEY"] = ""
os.environ["LANGCHAIN_API_KEY"] = ""
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "langsmith-academy"

In [None]:
# Or you can use a .env file
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../.env", override=True)

### Summary Evaluator

In [None]:
from typing import List
from langsmith.schemas import Example, Run
from langsmith.evaluation import evaluate

def f1_score_summary_evaluator(runs: List[Run], examples: List[Example]) -> dict:
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    for run, example in zip(runs, examples):
        # Matches the output format of your dataset
        reference = example.outputs["answer"]
        # Matches the output dict in `predict` function below
        prediction = run.outputs["prediction"]
        if reference and prediction == reference:
            true_positives += 1
        elif prediction and not reference:
            false_positives += 1
        elif not prediction and reference:
            false_negatives += 1
    if true_positives == 0:
        return {"key": "f1_score", "score": 0.0}

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    f1_score = 2 * (precision * recall) / (precision + recall)
    return {"key": "f1_score", "score": f1_score}

def predict(inputs: dict):
    return {"prediction": True}

evaluate(
    predict, # Your classifier
    data="<dataset_name>",
    summary_evaluators=[f1_score_summary_evaluator],
)