In [1]:
import time
from rich.pretty import pprint
import logging

logging.getLogger("httpx").setLevel(logging.WARNING)

In [2]:
BASE_URL = "http://localhost:8321"
def create_http_client():
    from llama_stack_client import LlamaStackClient
    return LlamaStackClient(base_url=BASE_URL)

client = create_http_client()

In [3]:
client.providers.list()

[ProviderInfo(api='inference', config={'url': 'http://localhost:8080/v1', 'max_tokens': 4096.0, 'api_token': '********', 'tls_verify': True}, health={'status': 'OK'}, provider_id='vllm', provider_type='remote::vllm'),
 ProviderInfo(api='eval', config={'base_url': 'https://b31dc9b91cf1.ngrok-free.app/v1', 'timeout': 10800.0, 'max_concurrent_jobs': 5.0, 'tls_verify': True, 'kubeflow_config': {'pipelines_endpoint': 'https://ds-pipeline-dspa-model-namespace.apps.rosa.y1m4j9o2e1n6b9l.r6mx.p3.openshiftapps.com', 'namespace': 'model-namespace', 'experiment_name': 'trustyai-garak-scans', 'base_image': 'quay.io/spandraj/trustyai-garak-provider:latest'}}, health={'status': 'Not Implemented', 'message': 'Provider does not implement health check'}, provider_id='trustyai_garak', provider_type='remote::trustyai_garak'),
 ProviderInfo(api='files', config={'storage_dir': '/Users/spandraj/.llama/distributions/trustyai-garak/files', 'metadata_store': {'type': 'sqlite', 'db_path': '/Users/spandraj/.llama

In [None]:
user_defined_probe_benchmark_id = "custom"

client.benchmarks.register(
    benchmark_id=user_defined_probe_benchmark_id,
    dataset_id="garak", # placeholder
    scoring_functions=["garak_scoring"], # placeholder
    provider_benchmark_id=user_defined_probe_benchmark_id,
    provider_id="trustyai_garak",
    metadata={
        "probes": ["latentinjection.LatentJailbreak"],
        "timeout": 60*15, # optional
        # "use_gpu": True, # optional, needs GPU image as KUBEFLOW_BASE_IMAGE
    }
)

In [None]:
job = client.alpha.eval.run_eval(
    benchmark_id=user_defined_probe_benchmark_id,
    benchmark_config={
        "eval_candidate": {
            "type": "model",
            "model": "vllm/qwen2",
            "sampling_params": {
                "max_tokens": 100
            },
        }
     },
)

print(f"Starting job '{job}'")

Starting job 'Job(job_id='garak-job-ea91c2e9-52c6-4f78-a672-889a5eef9c9b', status='scheduled', metadata={'created_at': '2025-08-21T04:18:57+00:00', 'kfp_run_id': '10870647-482b-4cc2-9948-ed1709932e68'})'


In [None]:
def get_job_status(job_id, benchmark_id):
    return client.alpha.eval.jobs.status(job_id=job_id, benchmark_id=benchmark_id)

while True:
    job = get_job_status(job_id=job.job_id, benchmark_id=user_defined_probe_benchmark_id)
    print(job)

    if job.status in ['failed', 'completed', 'cancelled']:
        print("="*100)
        print(f"Job ended with status: {job.status}")
        break

    time.sleep(20)

Job(job_id='garak-job-ea91c2e9-52c6-4f78-a672-889a5eef9c9b', status='scheduled', metadata={'created_at': '2025-08-21T04:18:57+00:00', 'kfp_run_id': '10870647-482b-4cc2-9948-ed1709932e68'})
Job(job_id='garak-job-ea91c2e9-52c6-4f78-a672-889a5eef9c9b', status='in_progress', metadata={'created_at': '2025-08-21T04:18:57+00:00', 'kfp_run_id': '10870647-482b-4cc2-9948-ed1709932e68'})
Job(job_id='garak-job-ea91c2e9-52c6-4f78-a672-889a5eef9c9b', status='in_progress', metadata={'created_at': '2025-08-21T04:18:57+00:00', 'kfp_run_id': '10870647-482b-4cc2-9948-ed1709932e68'})
Job(job_id='garak-job-ea91c2e9-52c6-4f78-a672-889a5eef9c9b', status='in_progress', metadata={'created_at': '2025-08-21T04:18:57+00:00', 'kfp_run_id': '10870647-482b-4cc2-9948-ed1709932e68'})
Job(job_id='garak-job-ea91c2e9-52c6-4f78-a672-889a5eef9c9b', status='in_progress', metadata={'created_at': '2025-08-21T04:18:57+00:00', 'kfp_run_id': '10870647-482b-4cc2-9948-ed1709932e68'})
Job(job_id='garak-job-ea91c2e9-52c6-4f78-a672-8

In [None]:
job_result = client.alpha.eval.jobs.retrieve(job_id=job.job_id, benchmark_id=user_defined_probe_benchmark_id)
scores = job_result.scores

In [8]:
aggregated_scores = {k: v.aggregated_results for k, v in scores.items()}
pprint(aggregated_scores)

In [9]:
pprint(job.metadata)

In [10]:
pprint(client.files.retrieve(job.metadata['scan.log']))

In [11]:
# last 10 lines of scan.log
client.files.content(job.metadata['scan.log']).split('\n')[-10:]

['2025-08-21 04:26:51,796  DEBUG  response_closed.started',
 '2025-08-21 04:26:51,796  DEBUG  response_closed.complete',
 '2025-08-21 04:26:51,796  DEBUG  HTTP Response: POST https://b31dc9b91cf1.ngrok-free.app/v1/openai/v1/chat/completions "200 OK" Headers({\'content-length\': \'888\', \'content-type\': \'application/json\', \'date\': \'Thu, 21 Aug 2025 04:26:47 GMT\', \'ngrok-agent-ips\': \'2601:19b:780:78f0:89c0:74b8:90e0:8b57\', \'server\': \'uvicorn\', \'x-trace-id\': \'973a82d0a4de3e4ec62e5383f31e9801\'})',
 '2025-08-21 04:26:51,796  DEBUG  request_id: None',
 '2025-08-21 04:26:51,815  DEBUG  probe return: <garak.probes.snowball.GraphConnectivity object at 0x7fd6ecc48f50> with 100 attempts',
 '2025-08-21 04:26:51,815  DEBUG  harness: run detector garak.detectors.snowball.DontStartYes',
 '2025-08-21 04:26:51,820  DEBUG  harness: probe list iteration completed',
 '2025-08-21 04:26:51,820  INFO  run complete, ending',
 '2025-08-21 04:26:51,860  INFO  garak run complete in 422.53s',
