In [None]:
!pip install vijil python-dotenv

In [None]:
from dotenv import load_dotenv

load_dotenv(dotenv_path='rag-agent/.env')

import os
from vijil import Vijil

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
assert os.getenv('VIJIL_API_KEY_PROD') is not None, "VIJIL_API_KEY_PROD is not set"

evaluate_base_url_prod = "https://evaluate-api.vijil.ai/v1"
client = Vijil(
    base_url=evaluate_base_url_prod,
    api_key=os.getenv('VIJIL_API_KEY_PROD')
)

client.agents.list()


First, we need to create an API key for the agent-under-test in the platform that sets a rate limit for querying the agent. If the agent-under-test itself requires an API key to query, then we need to insert it here.

In [15]:
agent_url = "https://85a847ca55337a9027743abd6e6346593ab15bb5-8000.dstack-pha-prod7.phala.network/v1"
phala_webinar_api_key_10rpm = client.api_keys.create(
    name="phala_webinar_api_key_10rpm",
    model_hub="custom",
    rate_limit_per_interval=10,
    rate_limit_interval=60,
    api_key="placeholder",
    url=agent_url
)

print(phala_webinar_api_key_10rpm)

{'id': '540c616e-c9e1-446e-837a-5ab9221aa72b', 'name': 'phala_webinar_api_key_10rpm', 'hub': 'custom', 'rate_limit_per_interval': 10, 'rate_limit_interval': 60, 'display_value': 'pl*******er', 'hub_config': None, 'user_id': 'ebfed21d-5017-47bc-bff8-c45675a0650e', 'team_id': 'ef6bdaec-f563-487c-b036-674c912da053', 'status': 'active'}


After the API key (which also tests the agent responsiveness), we create an agent entity in the platform. Use the API key name from the result of the API key creation and populate the remaining fields.

In [16]:
import uuid 

client.agents.create(
    agent_name=f"phala-agent-{uuid.uuid4()}",
    hub="custom",
    api_key_name=phala_webinar_api_key_10rpm['name'],
    model_name="vijil-docs-agent",
)

{'agent_name': 'phala-agent-c1e7c20d-3068-4ad8-8b10-e6fa9f18c509',
 'model_name': 'vijil-docs-agent',
 'agent_url': '',
 'hub': 'custom',
 'team_id': 'ef6bdaec-f563-487c-b036-674c912da053',
 'created_by': 'ebfed21d-5017-47bc-bff8-c45675a0650e',
 'api_key_id': '540c616e-c9e1-446e-837a-5ab9221aa72b',
 'agent_system_prompt': None,
 'id': '559a9013-20ac-49d8-934a-01d69b9c35c0',
 'status': 'active',
 'created_at': 1762135027,
 'hub_config': None}

Now we can create an evaluation. Insert all the required fields, make a random name for the evaluation, set the testing suite (harnesses), api key and model url.

In [None]:
eval = client.evaluations.create(
    model_hub="custom",
    model_name="vijil-docs-agent",
    name=f"phala-eval-{uuid.uuid4()}",
    api_key_name=phala_webinar_api_key_10rpm['name'],
    harnesses=["trust_score"],
    model_url=agent_url
)

TypeError: Evaluations.create() missing 1 required positional argument: 'model_hub'

You can query the status of the evaluation as follows:

In [25]:
# eval_id = eval['id']
eval_id = "8393553b-8d62-4bb5-a5de-a62130f11e38"
client.evaluations.get_status(evaluation_id=eval_id)

{'id': '8393553b-8d62-4bb5-a5de-a62130f11e38',
 'name': 'phala-eval-8777fe26-cf33-4f77-a8c8-d25ad9c69d94',
 'tags': ['vijil_harness'],
 'status': 'IN_PROGRESS',
 'cause': None,
 'total_test_count': 2066,
 'completed_test_count': 54,
 'error_test_count': 0,
 'total_response_count': 54,
 'completed_response_count': 1,
 'error_response_count': 0,
 'total_generation_time': '437.000000',
 'average_generation_time': '2.3888888888888889',
 'score': None,
 'status_counts': {'probes': {'CREATED': 186, 'SCORED': 1},
  'tests': {'CREATED': 2010, 'GENERATED': 54, 'GENERATING': 2},
  'responses': {'SKIP': 1, 'GENERATED': 52, 'COMPLETED': 1}},
 'hub': 'custom',
 'model': 'vijil-docs-agent',
 'url': 'https://85a847ca55337a9027743abd6e6346593ab15bb5-8000.dstack-pha-prod7.phala.network/v1',
 'created_at': 1762135719,
 'created_by': 'ebfed21d-5017-47bc-bff8-c45675a0650e',
 'completed_at': None,
 'team_id': 'ef6bdaec-f563-487c-b036-674c912da053',
 'restart_count': 0,
 'metadata': None,
 'completion_token

In [26]:
eval_id = "c509cbbc-3079-49d7-970a-375fe6c67477"
client.evaluations.get_status(evaluation_id=eval_id)

{'id': 'c509cbbc-3079-49d7-970a-375fe6c67477',
 'name': 'test',
 'tags': ['vijil_harness', 'small'],
 'status': 'COMPLETED',
 'cause': None,
 'total_test_count': 2137,
 'completed_test_count': 2059,
 'error_test_count': 0,
 'total_response_count': 2059,
 'completed_response_count': 1648,
 'error_response_count': 17,
 'total_generation_time': '17575.000000',
 'average_generation_time': '4.1432734337056824',
 'score': 0.8499620061858941,
 'status_counts': {'probes': {'ERROR': 12, 'COMPLETED': 175},
  'tests': {'CREATED': 73, 'GENERATED': 2059, 'GENERATING': 5},
  'responses': {'SKIP': 122,
   'GENERATED': 272,
   'ERROR': 17,
   'COMPLETED': 1648}},
 'hub': 'custom',
 'model': 'vijil-docs-agent',
 'url': 'https://85a847ca55337a9027743abd6e6346593ab15bb5-8000.dstack-pha-prod7.phala.network/v1',
 'created_at': 1761895139,
 'created_by': 'ebfed21d-5017-47bc-bff8-c45675a0650e',
 'completed_at': 1761912728,
 'team_id': 'ef6bdaec-f563-487c-b036-674c912da053',
 'restart_count': 0,
 'metadata': 

We can now download the trust report using the following commands.

In [29]:
analysis_report = client.evaluations.report(evaluation_id=eval_id)
analysis_report.generate(save_file="analysis_report.pdf", wait_till_completion=True, format="pdf")

could not convert string to float: 'ERROR There are too many missing probes to score this scenario.'.....


ValueError: An error occurred while checking the report status: Incoming markup is of an invalid type: None. Markup must be a string, a bytestring, or an open filehandle.