Skip to content

Port from safety to redteaming #201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 2, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Port to redteaming
  • Loading branch information
pamelafox committed May 1, 2025
commit 55ba423fc751152756e08feec408a653e68f394c
3 changes: 2 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -33,7 +33,8 @@
"mtxr.sqltools",
"mtxr.sqltools-driver-pg",
"ms-vscode.vscode-node-azure-pack",
"esbenp.prettier-vscode"
"esbenp.prettier-vscode",
"twixes.pypi-assistant"
],
// Set *default* container specific settings.json values on container create.
"settings": {
8 changes: 8 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -21,6 +21,14 @@
"module": "uvicorn",
"args": ["fastapi_app:create_app", "--factory", "--reload"],
"justMyCode": false
},
{
"name": "Python: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": false
}
],
"compounds": [
4 changes: 2 additions & 2 deletions evals/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
git+https://github.com/Azure-Samples/ai-rag-chat-evaluator/@2025-02-06b
azure-ai-evaluation
git+https://github.com/Azure-Samples/ai-rag-chat-evaluator/@2025-05-01
azure-ai-evaluation[redteam]>=1.5.0
rich
dotenv-azd
108 changes: 53 additions & 55 deletions evals/safety_evaluation.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
import argparse
import asyncio
import json
import logging
import os
import pathlib
import sys
from enum import Enum

import requests
from azure.ai.evaluation import AzureAIProject, ContentSafetyEvaluator
from azure.ai.evaluation.simulator import (
AdversarialScenario,
AdversarialSimulator,
SupportedLanguages,
)
from azure.ai.evaluation import AzureAIProject
from azure.ai.evaluation.red_team import AttackStrategy, RedTeam, RiskCategory
from azure.identity import AzureDeveloperCliCredential
from dotenv_azd import load_azd_env
from rich.logging import RichHandler
from rich.progress import track

logger = logging.getLogger("ragapp")

# Configure logging to capture and display warnings with tracebacks
logging.captureWarnings(True) # Capture warnings as log messages

root_dir = pathlib.Path(__file__).parent


@@ -47,11 +45,10 @@ def get_azure_credential():


async def callback(
messages: dict,
messages: list,
target_url: str = "http://127.0.0.1:8000/chat",
):
messages_list = messages["messages"]
query = messages_list[-1]["content"]
query = messages[-1].content
headers = {"Content-Type": "application/json"}
body = {
"messages": [{"content": query, "role": "user"}],
@@ -65,7 +62,7 @@ async def callback(
message = {"content": response["error"], "role": "assistant"}
else:
message = response["message"]
return {"messages": messages_list + [message]}
return {"messages": messages + [message]}


async def run_simulator(target_url: str, max_simulations: int):
@@ -75,50 +72,35 @@ async def run_simulator(target_url: str, max_simulations: int):
"resource_group_name": os.environ["AZURE_RESOURCE_GROUP"],
"project_name": os.environ["AZURE_AI_PROJECT"],
}

# Simulate single-turn question-and-answering against the app
scenario = AdversarialScenario.ADVERSARIAL_QA
adversarial_simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=credential)

outputs = await adversarial_simulator(
scenario=scenario,
model_red_team = RedTeam(
azure_ai_project=azure_ai_project,
credential=credential,
risk_categories=[
RiskCategory.Violence,
# RiskCategory.HateUnfairness,
# RiskCategory.Sexual,
# RiskCategory.SelfHarm,
],
num_objectives=1,
)
await model_red_team.scan(
target=lambda messages, stream=False, session_state=None, context=None: callback(messages, target_url),
max_simulation_results=max_simulations,
language=SupportedLanguages.English, # Match this to your app language
randomization_seed=1, # For more consistent results, use a fixed seed
scan_name="Advanced-Callback-Scan",
attack_strategies=[
AttackStrategy.EASY, # Group of easy complexity attacks
# AttackStrategy.MODERATE, # Group of moderate complexity attacks
# AttackStrategy.CharacterSpace, # Add character spaces
# AttackStrategy.ROT13, # Use ROT13 encoding
# AttackStrategy.UnicodeConfusable, # Use confusable Unicode characters
# AttackStrategy.CharSwap, # Swap characters in prompts
# AttackStrategy.Morse, # Encode prompts in Morse code
# AttackStrategy.Leetspeak, # Use Leetspeak
# AttackStrategy.Url, # Use URLs in prompts
# AttackStrategy.Binary, # Encode prompts in binary
],
output_path="Advanced-Callback-Scan.json",
)

# Run safety evaluation on the outputs and save the scores
# Do not save the outputs, as they may contain disturbing content
evaluators = ["hate_unfairness", "sexual", "violence", "self_harm"]
summary_scores = {
evaluator: {"low_count": 0, "score_total": 0.0, "mean_score": 0.0, "low_rate": 0.0} for evaluator in evaluators
}
for output in track(outputs, description="Evaluating simulated responses..."):
query = output["messages"][0]["content"]
answer = output["messages"][1]["content"]
safety_eval = ContentSafetyEvaluator(credential=credential, azure_ai_project=azure_ai_project)
eval_score = safety_eval(query=query, response=answer)
for evaluator in evaluators:
severity_level = HarmSeverityLevel(eval_score[evaluator])
if severity_level == HarmSeverityLevel.VeryLow or severity_level == HarmSeverityLevel.Low.name:
summary_scores[evaluator]["low_count"] += 1
else:
logger.info(f"Failing score from:\nQ: {query}\nA: {answer}\n{evaluator} score: {eval_score}")
numeric_severity_score = eval_score[f"{evaluator}_score"]
if isinstance(numeric_severity_score, float) or isinstance(numeric_severity_score, int):
summary_scores[evaluator]["score_total"] += numeric_severity_score

# Compute the overall statistics
for evaluator in evaluators:
if len(outputs) > 0:
summary_scores[evaluator]["mean_score"] = summary_scores[evaluator]["score_total"] / len(outputs)
summary_scores[evaluator]["low_rate"] = summary_scores[evaluator]["low_count"] / len(outputs)

# Save summary scores
with open(root_dir / "safety_results.json", "w") as f:
json.dump(summary_scores, f, indent=2)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run safety evaluation simulator.")
@@ -130,10 +112,26 @@ async def run_simulator(target_url: str, max_simulations: int):
)
args = parser.parse_args()

# Configure logging to show tracebacks for warnings and above
logging.basicConfig(
level=logging.WARNING, format="%(message)s", datefmt="[%X]", handlers=[RichHandler(rich_tracebacks=True)]
level=logging.WARNING,
format="%(message)s",
datefmt="[%X]",
handlers=[RichHandler(rich_tracebacks=True, show_path=True)],
)

# Set urllib3 and azure libraries to WARNING level to see connection issues
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("azure").setLevel(logging.DEBUG)
logging.getLogger("RedTeamLogger").setLevel(logging.DEBUG)

# Set our application logger to INFO level
logger.setLevel(logging.INFO)

load_azd_env()

asyncio.run(run_simulator(args.target_url, args.max_simulations))
try:
asyncio.run(run_simulator(args.target_url, args.max_simulations))
except Exception:
logging.exception("Unhandled exception in safety evaluation")
sys.exit(1)
30 changes: 30 additions & 0 deletions infra/main.bicep
Original file line number Diff line number Diff line change
@@ -408,6 +408,24 @@ module openAI 'core/ai/cognitiveservices.bicep' = if (deployAzureOpenAI) {
}
}

module storage 'br/public:avm/res/storage/storage-account:0.9.1' = if (useAiProject) {
name: 'storage'
scope: resourceGroup
params: {
name: '${take(replace(prefix, '-', ''), 17)}storage'
location: location
tags: tags
kind: 'StorageV2'
skuName: 'Standard_LRS'
networkAcls: {
defaultAction: 'Allow'
bypass: 'AzureServices'
}
allowBlobPublicAccess: false
allowSharedKeyAccess: false
}
}

module ai 'core/ai/ai-environment.bicep' = if (useAiProject) {
name: 'ai'
scope: resourceGroup
@@ -417,6 +435,7 @@ module ai 'core/ai/ai-environment.bicep' = if (useAiProject) {
hubName: 'aihub-${resourceToken}'
projectName: 'aiproj-${resourceToken}'
applicationInsightsId: monitoring.outputs.applicationInsightsId
storageAccountId: storage.outputs.resourceId
}
}

@@ -442,6 +461,17 @@ module openAIRoleBackend 'core/security/role.bicep' = {
}
}

// Application Insights Reader role for web app
module appInsightsReaderRole 'core/security/role.bicep' = {
scope: resourceGroup
name: 'appinsights-reader-role'
params: {
principalId: principalId
roleDefinitionId: '43d0d8ad-25c7-4714-9337-8ba259a9fe05' // Application Insights Component Reader
principalType: 'User'
}
}

output AZURE_LOCATION string = location
output AZURE_TENANT_ID string = tenant().tenantId
output AZURE_RESOURCE_GROUP string = resourceGroup.name
Loading
Oops, something went wrong.