In [1]:
import time
from rich.pretty import pprint
import logging

logging.getLogger("httpx").setLevel(logging.WARNING)

In [None]:
BASE_URL = "http://localhost:8321"
def create_http_client():
    from llama_stack_client import LlamaStackClient
    return LlamaStackClient(base_url=BASE_URL)

client = create_http_client()

In [3]:
client.providers.list()

[ProviderInfo(api='inference', config={'url': 'http://localhost:8080/v1', 'max_tokens': 4096.0, 'api_token': '********', 'tls_verify': True}, health={'status': 'OK'}, provider_id='vllm', provider_type='remote::vllm'),
 ProviderInfo(api='eval', config={'base_url': 'https://ea5f9b2f63fd.ngrok-free.app/v1', 'timeout': 10800.0, 'max_concurrent_jobs': 5.0, 'tls_verify': True, 'kubeflow_config': {'pipelines_endpoint': 'https://ds-pipeline-dspa-model-namespace.apps.rosa.y1m4j9o2e1n6b9l.r6mx.p3.openshiftapps.com', 'namespace': 'model-namespace', 'experiment_name': 'trustyai-garak-scans', 'base_image': 'quay.io/rh-ee-spandraj/trustyai-garak-provider-dsp:cpu'}}, health={'status': 'Not Implemented', 'message': 'Provider does not implement health check'}, provider_id='trustyai_garak', provider_type='remote::trustyai_garak'),
 ProviderInfo(api='files', config={'storage_dir': '/Users/spandraj/.llama/distributions/trustyai-garak/files', 'metadata_store': {'type': 'sqlite', 'db_path': '/Users/spandraj

In [4]:
user_defined_probe_benchmark_id = "test-benchmark"

client.benchmarks.register(
    benchmark_id=user_defined_probe_benchmark_id,
    dataset_id="garak", # placeholder
    scoring_functions=["garak_scoring"], # placeholder
    provider_benchmark_id=user_defined_probe_benchmark_id,
    provider_id="trustyai_garak",
    metadata={
        "probes": ["latentinjection.LatentJailbreak", "grandma"],
        "timeout": 60*30, # optional
        # "use_gpu": True, # optional, needs GPU image as KUBEFLOW_BASE_IMAGE
    }
)

In [5]:
job = client.alpha.eval.run_eval(
    benchmark_id=user_defined_probe_benchmark_id,
    benchmark_config={
        "eval_candidate": {
            "type": "model",
            "model": "vllm/qwen2",
            "sampling_params": {
                "max_tokens": 100
            },
        }
     },
)

print(f"Starting job '{job}'")

Starting job 'Job(job_id='garak-job-556d62b9-1226-4d3f-ae38-8009254ced5a', status='scheduled', metadata={'created_at': '2025-10-09T01:09:02+00:00', 'kfp_run_id': '5f1afd33-17d4-425e-ae91-d692be312f88'})'


In [None]:
# def get_job_status(job_id, benchmark_id):
#     return client.eval.jobs.status(job_id=job_id, benchmark_id=benchmark_id)

# while True:
#     job = get_job_status(job_id=job.job_id, benchmark_id=user_defined_probe_benchmark_id)
#     print(job)

#     if job.status in ['failed', 'completed', 'cancelled']:
#         print("="*100)
#         print(f"Job ended with status: {job.status}")
#         break

#     time.sleep(20)

In [7]:
from llama_stack_provider_trustyai_garak.utils import wait_for_completion_with_progress


final_status = wait_for_completion_with_progress(
    client, job.job_id, user_defined_probe_benchmark_id, poll_interval=5
)

Garak Scan Postprocessing: 100.0%|██████████████████████████████████████████████████████████| , 📊 Parsing results and uploading reports... [00:08:59]

Job ended with status: completed ✅





In [8]:
job_result = client.alpha.eval.jobs.retrieve(job_id=job.job_id, benchmark_id=user_defined_probe_benchmark_id)
scores = job_result.scores

In [9]:
aggregated_scores = {k: v.aggregated_results for k, v in scores.items()}
pprint(aggregated_scores)

In [12]:
pprint(final_status)

In [13]:
pprint(client.files.retrieve(final_status.metadata['scan.log']))

In [14]:
# last 10 lines of scan.log
client.files.content(final_status.metadata['scan.log']).split('\n')[-10:]

['2025-10-09 01:16:52,059  DEBUG  response_closed.complete',
 '2025-10-09 01:16:52,059  DEBUG  HTTP Response: POST https://ea5f9b2f63fd.ngrok-free.app/v1/openai/v1/chat/completions "200 OK" Headers({\'content-length\': \'1413\', \'content-type\': \'application/json\', \'date\': \'Thu, 09 Oct 2025 01:16:50 GMT\', \'ngrok-agent-ips\': \'2601:19b:780:6680:a8c2:559e:f5c6:f4e3\', \'server\': \'uvicorn\', \'x-trace-id\': \'9e3adc51b17e40cc6b672eb25c03e1f2\'})',
 '2025-10-09 01:16:52,059  DEBUG  request_id: None',
 '2025-10-09 01:16:52,088  DEBUG  probe return: <garak.probes.latentinjection.LatentJailbreak object at 0x7fe2db96f230> with 256 attempts',
 '2025-10-09 01:16:52,089  DEBUG  harness: run detector garak.detectors.unsafe_content.ToxicCommentModel',
 '2025-10-09 01:17:05,892  DEBUG  harness: run detector garak.detectors.base.TriggerListDetector',
 '2025-10-09 01:17:05,903  DEBUG  harness: probe list iteration completed',
 '2025-10-09 01:17:05,904  INFO  run complete, ending',
 '2025-10