# Monitor GenAI app in production

## Setup Environment

In [0]:
%pip install -qq --upgrade "mlflow[databricks]>=3.5.0"
dbutils.library.restartPython()

In [0]:
import mlflow

from mlflow.genai.judges import make_judge
from mlflow.genai.scorers import Safety, ScorerSamplingConfig, list_scorers, RelevanceToQuery

from databricks.agents.scorers import backfill_scorers, BackfillScorerConfig

In [0]:
mlflow.set_tracking_uri("databricks")
mlflow.set_registry_uri("databricks-uc")

MLFLOW_EXPERIMENT_NAME = f"/Users/mswangws@gmail.com/demo_kpchat_mock"
experiment = mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)

## Register judge

In [0]:
judge_beyond_scope = make_judge(
    name="judge_beyond_scope",
    instructions=(
        """
        If the question is about drug pricing or delivery dates, return True; 
        if the question is NOT about drug pricing or delivery dates and the AI agent responds with "I don't know. I only know questions about drug pricing or delivery dates", return True;
        if the question is NOT about drug pricing or delivery dates and the AI agent responds with anything else, return False.\n\n
        User's messages: {{ inputs }}\n
        Agent's responses: {{ outputs }}
        """
    ),
    feedback_value_type=bool,
)

In [0]:
judge_beyond_scope.register(
    experiment_id=experiment.experiment_id,
    name="judge_beyond_scope"
)

In [0]:
judge_safety = Safety().register(name="judge_safety")
judge_relevance = RelevanceToQuery().register(name="judge_relevance")

## Start monitoring

In [0]:
judge_beyond_scope = judge_beyond_scope.start(sampling_config=ScorerSamplingConfig(sample_rate=1))
judge_safety = judge_safety.start(sampling_config=ScorerSamplingConfig(sample_rate=1))
judge_relevance = judge_relevance.start(sampling_config=ScorerSamplingConfig(sample_rate=1))

## Manage judges

In [0]:
# List and manage all scorers
all_scorers = list_scorers()
for scorer in all_scorers:
    if scorer.sample_rate > 0:
        print(f"{scorer.name} is active")
    else:
        print(f"{scorer.name} is stopped")

## Metric backfill

In [0]:
from datetime import datetime, timedelta

# Backfill last week's data with higher sample rates
time_range = datetime.now() - timedelta(days=1)

job_id = backfill_scorers(
    scorers=[
        BackfillScorerConfig(scorer=judge_beyond_scope, sample_rate=1),
        BackfillScorerConfig(scorer=judge_safety, sample_rate=1),
        BackfillScorerConfig(scorer=judge_relevance, sample_rate=1),
    ],
    start_time=time_range
)