In [1]:
import time
from rich.pretty import pprint
import requests
import json

In [2]:
BASE_URL = "http://localhost:8321"

def create_http_client():
    from llama_stack_client import LlamaStackClient
    return LlamaStackClient(base_url=BASE_URL)

client = create_http_client()

In [3]:
pprint(f"Available Models: {client.models.list()}")

### scan raw LLM

In [4]:
benchmark_id = "trustyai_garak::prompt_injection"
client.benchmarks.register(
    benchmark_id=benchmark_id,
    dataset_id=benchmark_id,
    scoring_functions=["string"],
    provider_benchmark_id="prompt_injection",
    provider_id="trustyai_garak",
    metadata={
        "probes": ["promptinject.HijackHateHumans"],
        "timeout": 60*10,
    }
)

In [5]:
job = client.eval.run_eval(
    benchmark_id=benchmark_id,
    benchmark_config={
        "eval_candidate": {
            "type": "model",
            "model": "qwen2",
            "sampling_params": {},
        }
     },
)

pprint(f"Starting job '{job}'")

In [6]:
def get_job_status(job_id, benchmark_id):
    return client.eval.jobs.status(job_id=job_id, benchmark_id=benchmark_id)

while True:
    job = get_job_status(job_id=job.job_id, benchmark_id=benchmark_id)
    print(job)

    if job.status in ['failed', 'completed', 'cancelled']:
        pprint(f"Job ended with status: {job.status}")
        break

    time.sleep(20)

Job(job_id='garak-job-5825cdd0-f038-47a2-bc99-4fab923d6baf', status='in_progress', metadata={'created_at': '2025-07-22T12:51:50.727279', 'started_at': '2025-07-22T12:51:50.729351', 'process_id': '62176'})
Job(job_id='garak-job-5825cdd0-f038-47a2-bc99-4fab923d6baf', status='in_progress', metadata={'created_at': '2025-07-22T12:51:50.727279', 'started_at': '2025-07-22T12:51:50.729351', 'process_id': '62176'})
Job(job_id='garak-job-5825cdd0-f038-47a2-bc99-4fab923d6baf', status='in_progress', metadata={'created_at': '2025-07-22T12:51:50.727279', 'started_at': '2025-07-22T12:51:50.729351', 'process_id': '62176'})
Job(job_id='garak-job-5825cdd0-f038-47a2-bc99-4fab923d6baf', status='in_progress', metadata={'created_at': '2025-07-22T12:51:50.727279', 'started_at': '2025-07-22T12:51:50.729351', 'process_id': '62176'})
Job(job_id='garak-job-5825cdd0-f038-47a2-bc99-4fab923d6baf', status='in_progress', metadata={'created_at': '2025-07-22T12:51:50.727279', 'started_at': '2025-07-22T12:51:50.729351',

re. scores, each key is a garak probe and value is a ScoringResult object containing scores from respective garak detectors

In [7]:
scores = client.eval.jobs.retrieve(job_id=job.job_id, benchmark_id=benchmark_id).scores

In [8]:
pprint(scores)

In [9]:
## extract aggregated scores
aggregated_scores = {k: v.aggregated_results for k, v in scores.items()}
pprint(aggregated_scores)

### scan LLM with appropriate shield

In [13]:
benchmark_id = "trustyai_garak::prompt_injection_with_input_shield"
client.benchmarks.register(
    benchmark_id=benchmark_id,
    dataset_id=benchmark_id,
    scoring_functions=["string"],
    provider_benchmark_id="prompt_injection_with_input_shield",
    provider_id="trustyai_garak",
    metadata={
        "probes": ["promptinject.HijackHateHumans"],
        "timeout": 60*10,
        "shield_ids": ["Prompt-Guard-86M"]
    }
)

In [14]:
job = client.eval.run_eval(
    benchmark_id=benchmark_id,
    benchmark_config={
        "eval_candidate": {
            "type": "model",
            "model": "qwen2",
            "provider_id": "trustyai_garak",
            "sampling_params": {},
        }
     },
)

pprint(f"Starting job '{job}'")

In [15]:
def get_job_status(job_id, benchmark_id):
    return client.eval.jobs.status(job_id=job_id, benchmark_id=benchmark_id)

while True:
    job = get_job_status(job_id=job.job_id, benchmark_id=benchmark_id)
    print(job)

    if job.status in ['failed', 'completed', 'cancelled']:
        pprint(f"Job ended with status: {job.status}")
        break

    time.sleep(20)

Job(job_id='garak-job-46d63094-b3b9-4fb0-9be8-db04feee84c7', status='in_progress', metadata={'created_at': '2025-07-22T12:56:13.610761', 'started_at': '2025-07-22T12:56:13.611512', 'process_id': '63143'})
Job(job_id='garak-job-46d63094-b3b9-4fb0-9be8-db04feee84c7', status='in_progress', metadata={'created_at': '2025-07-22T12:56:13.610761', 'started_at': '2025-07-22T12:56:13.611512', 'process_id': '63143'})
Job(job_id='garak-job-46d63094-b3b9-4fb0-9be8-db04feee84c7', status='completed', metadata={'created_at': '2025-07-22T12:56:13.610761', 'started_at': '2025-07-22T12:56:13.611512', 'process_id': '63143', 'scan_report_file_id': 'file-3857c614ec014653bf9b574730c8eac6', 'scan_log_file_id': 'file-21323a95058a482f9b25db6021c7d888', 'scan_hitlog_file_id': 'file-a599e6a3f2e543a88bd6172dd9f66a3f', 'scan_report_html_file_id': 'file-b409f027e5b14122be85500eb8e53e94', 'completed_at': '2025-07-22T12:56:35.498162'})


See the prompt_injection score will get drastically reduced because of our input shield

In [16]:
scores = client.eval.jobs.retrieve(job_id=job.job_id, benchmark_id=benchmark_id).scores

In [19]:
## extract aggregated scores
aggregated_scores = {k: v.aggregated_results for k, v in scores.items()}
pprint(aggregated_scores)

In [20]:
pprint(scores)