In [1]:
import time
from rich.pretty import pprint
import logging

logging.getLogger("httpx").setLevel(logging.WARNING)

In [2]:
BASE_URL = "http://localhost:8321"
def create_http_client():
    from llama_stack_client import LlamaStackClient
    return LlamaStackClient(base_url=BASE_URL)

client = create_http_client()

In [3]:
client.providers.list()

[ProviderInfo(api='inference', config={'url': 'http://localhost:8080/v1', 'max_tokens': 4096.0, 'api_token': '********', 'tls_verify': True}, health={'status': 'Not Implemented', 'message': 'Provider does not implement health check'}, provider_id='vllm', provider_type='remote::vllm'),
 ProviderInfo(api='eval', config={'llama_stack_url': 'https://8e58562ad6e3.ngrok-free.app', 'tls_verify': True, 'kubeflow_config': {'results_s3_prefix': 's3://garak-results/scans', 's3_credentials_secret_name': '********', 'pipelines_endpoint': 'https://ds-pipeline-dspa-model-namespace.apps.rosa.y1m4j9o2e1n6b9l.r6mx.p3.openshiftapps.com', 'namespace': 'model-namespace', 'base_image': 'quay.io/rh-ee-spandraj/trustyai-lls-garak-provider-dsp:latest', 'pipelines_api_token': '********'}}, health={'status': 'Not Implemented', 'message': 'Provider does not implement health check'}, provider_id='trustyai_garak_remote', provider_type='remote::trustyai_garak'),
 ProviderInfo(api='files', config={'storage_dir': '/Us

In [4]:
user_defined_probe_benchmark_id = "custom"

client.benchmarks.register(
    benchmark_id=user_defined_probe_benchmark_id,
    dataset_id="garak", # placeholder
    scoring_functions=["garak_scoring"], # placeholder
    provider_benchmark_id=user_defined_probe_benchmark_id,
    provider_id="trustyai_garak_remote",
    metadata={
        "probes": ["latentinjection.LatentJailbreak"],
        "timeout": 60*15, # optional
        # "use_gpu": True, # optional, needs GPU image as KUBEFLOW_BASE_IMAGE
    }
)

In [5]:
job = client.alpha.eval.run_eval(
    benchmark_id=user_defined_probe_benchmark_id,
    benchmark_config={
        "eval_candidate": {
            "type": "model",
            "model": "vllm/qwen2",
            "sampling_params": {
                "max_tokens": 100
            },
        }
     },
)

print(f"Starting job '{job}'")

Starting job 'Job(job_id='garak-job-96653442-c398-4c4f-a68e-c131a6d04629', status='scheduled', metadata={'created_at': '2025-12-02T05:18:35+00:00', 'kfp_run_id': '0b09986c-afdc-43a2-93e1-bd0c8ab45ab0'})'


In [6]:
def get_job_status(job_id, benchmark_id):
    return client.alpha.eval.jobs.status(job_id=job_id, benchmark_id=benchmark_id)

while True:
    job = get_job_status(job_id=job.job_id, benchmark_id=user_defined_probe_benchmark_id)
    print(job)

    if job.status in ['failed', 'completed', 'cancelled']:
        print("="*100)
        print(f"Job ended with status: {job.status}")
        break

    time.sleep(20)

Job(job_id='garak-job-96653442-c398-4c4f-a68e-c131a6d04629', status='scheduled', metadata={'created_at': '2025-12-02T05:18:35+00:00', 'kfp_run_id': '0b09986c-afdc-43a2-93e1-bd0c8ab45ab0'})
Job(job_id='garak-job-96653442-c398-4c4f-a68e-c131a6d04629', status='in_progress', metadata={'created_at': '2025-12-02T05:18:35+00:00', 'kfp_run_id': '0b09986c-afdc-43a2-93e1-bd0c8ab45ab0'})
Job(job_id='garak-job-96653442-c398-4c4f-a68e-c131a6d04629', status='in_progress', metadata={'created_at': '2025-12-02T05:18:35+00:00', 'kfp_run_id': '0b09986c-afdc-43a2-93e1-bd0c8ab45ab0'})
Job(job_id='garak-job-96653442-c398-4c4f-a68e-c131a6d04629', status='in_progress', metadata={'created_at': '2025-12-02T05:18:35+00:00', 'kfp_run_id': '0b09986c-afdc-43a2-93e1-bd0c8ab45ab0'})
Job(job_id='garak-job-96653442-c398-4c4f-a68e-c131a6d04629', status='in_progress', metadata={'created_at': '2025-12-02T05:18:35+00:00', 'kfp_run_id': '0b09986c-afdc-43a2-93e1-bd0c8ab45ab0'})
Job(job_id='garak-job-96653442-c398-4c4f-a68e-c

In [7]:
job_result = client.alpha.eval.jobs.retrieve(job_id=job.job_id, benchmark_id=user_defined_probe_benchmark_id)
scores = job_result.scores

In [8]:
aggregated_scores = {k: v.aggregated_results for k, v in scores.items()}
pprint(aggregated_scores)

In [9]:
pprint(job.metadata)

In [10]:
pprint(client.files.retrieve(job.metadata['scan.log']))

In [11]:
# last 10 lines of scan.log
client.files.content(job.metadata['scan.log']).split('\n')[-10:]

['2025-12-02 05:26:34,156  DEBUG  HTTP Response: POST https://8e58562ad6e3.ngrok-free.app/v1/chat/completions "200 OK" Headers({\'content-length\': \'699\', \'content-type\': \'application/json\', \'date\': \'Tue, 02 Dec 2025 05:26:32 GMT\', \'ngrok-agent-ips\': \'73.38.7.72\', \'server\': \'uvicorn\', \'x-trace-id\': \'342cd64ee9cd557f5fd52cc15c2b7ab4\'})',
 '2025-12-02 05:26:34,156  DEBUG  request_id: None',
 '2025-12-02 05:26:34,187  DEBUG  probe return: <garak.probes.latentinjection.LatentJailbreak object at 0x7fc8d5a74a10> with 256 attempts',
 '2025-12-02 05:26:34,187  DEBUG  harness: run detector garak.detectors.unsafe_content.ToxicCommentModel',
 '2025-12-02 05:26:46,112  DEBUG  harness: run detector garak.detectors.base.TriggerListDetector',
 '2025-12-02 05:26:46,120  INFO  hit log in /opt/app-root/scan_files/scan.hitlog.jsonl',
 '2025-12-02 05:26:46,122  DEBUG  harness: probe list iteration completed',
 '2025-12-02 05:26:46,122  INFO  run complete, ending',
 '2025-12-02 05:26: