Skip to content

Commit d55ff87

Browse files
authored
Merge pull request #9 from KyMidd/feature/guardrail-tracing
Add guardrail tracing
2 parents 32eb1c5 + cf0490d commit d55ff87

File tree

2 files changed

+120
-71
lines changed

2 files changed

+120
-71
lines changed

python/devopsbot.py

Lines changed: 113 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,15 @@
5757
enable_guardrails = False # Won't use guardrails if False
5858
guardrailIdentifier = "xxxxxxxxxx"
5959
guardrailVersion = "DRAFT"
60+
guardrailTracing = "enabled" # [enabled, enabled_full, disabled]
6061

6162
# Specify the AWS region for the AI model
6263
model_region_name = "us-west-2"
6364

6465
# Initial context step
6566
enable_initial_model_context_step = False
6667
initial_model_user_status_message = "Adding additional context :waiting:"
67-
initial_model_context_instructions = f"""
68+
initial_model_system_prompt = f"""
6869
Assistant should...
6970
"""
7071

@@ -87,7 +88,7 @@
8788
rerank_model_id = "amazon.rerank-v1:0"
8889

8990
# Model guidance, shimmed into each conversation as instructions for the model
90-
model_guidance = f"""Assistant is a large language model named {bot_name} who is trained to support our employees in providing the best possible experience for their developers and operations team.
91+
system_prompt = f"""Assistant is a large language model named {bot_name} who is trained to support our employees in providing the best possible experience for their developers and operations team.
9192
Assistant must follow Slack's best practices for formatting messages.
9293
Assistant must encode all hyperlinks with pipe syntax. For example, "https://www.google.com" should be formatted as "<https://www.google.com|Google>".
9394
Assistant must limit messages to {slack_message_size_limit_words} words, including code blocks. For longer responses Assistant should provide the first part of the response, and then prompt User to ask for the next part of the response.
@@ -125,18 +126,19 @@ def update_slack_response(say, client, message_ts, channel_id, thread_ts, messag
125126
# Return the message_ts
126127
return message_ts
127128

129+
128130
# Reranking knowledge base results
129131
def rerank_text(flat_conversation, kb_responses, bedrock_client, kb_rerank_number_of_results):
130132

131133
# Data looks like this:
132134
# [
133135
# {
134136
# "text": "text",
135-
# "url": "url",
137+
# "source": "url",
136138
# },
137139
# {
138140
# "text": "text",
139-
# "url": "url",
141+
# "source": "url",
140142
# }
141143
# ]
142144

@@ -196,7 +198,7 @@ def rerank_text(flat_conversation, kb_responses, bedrock_client, kb_rerank_numbe
196198
{
197199
# Use the index value in rank_response to find the correct kb_response
198200
"text": kb_responses[rank_response['index']]["text"],
199-
"url": kb_responses[rank_response['index']]["url"],
201+
"source": kb_responses[rank_response['index']]["source"],
200202
"relevance_score": rank_response['relevance_score']
201203
} for rank_response in rank_response_body['results']
202204
]
@@ -205,7 +207,7 @@ def rerank_text(flat_conversation, kb_responses, bedrock_client, kb_rerank_numbe
205207

206208

207209
# Function to retrieve info from RAG with knowledge base
208-
def ask_bedrock_llm_with_knowledge_base(flat_conversation, knowledge_base_id, bedrock_client, kb_number_of_results, kb_rerank_number_of_results) -> str:
210+
def ask_bedrock_llm_with_knowledge_base(flat_conversation, knowledge_base_id, bedrock_client, kb_number_of_results, kb_rerank_number_of_results, say, client, channel_id, thread_ts) -> str:
209211

210212
# Create a Bedrock agent runtime client
211213
bedrock_agent_runtime_client = boto3.client(
@@ -214,28 +216,41 @@ def ask_bedrock_llm_with_knowledge_base(flat_conversation, knowledge_base_id, be
214216
)
215217

216218
# Uses model to retrieve related vectors from knowledge base
217-
kb_response = bedrock_agent_runtime_client.retrieve(
218-
retrievalQuery={
219-
'text': flat_conversation
220-
},
221-
knowledgeBaseId=knowledge_base_id,
222-
retrievalConfiguration={
223-
'vectorSearchConfiguration': {
224-
'numberOfResults': kb_number_of_results,
225-
}
226-
},
227-
)
228-
219+
try:
220+
kb_response = bedrock_agent_runtime_client.retrieve(
221+
retrievalQuery={
222+
'text': flat_conversation
223+
},
224+
knowledgeBaseId=knowledge_base_id,
225+
retrievalConfiguration={
226+
'vectorSearchConfiguration': {
227+
'numberOfResults': kb_number_of_results,
228+
}
229+
},
230+
)
231+
# Catch exception around Aurora waking up
232+
except Exception as error:
233+
# If the request fails, print the error
234+
print(f"🚀 Error making request to knowledge base: {error}")
235+
236+
# Raise error
237+
raise error
238+
239+
if os.environ.get("VERA_DEBUG", "False") == "True":
240+
print("🚀 Raw knowledge base responses:", kb_response)
241+
229242
# Structure response
230243
kb_responses = [
231244
{
232245
"text": result['content']['text'],
233-
"url": result['location'].get('confluenceLocation', {}).get('url', 's3')
246+
#"source": result['location'].get('confluenceLocation', {}).get('url', 's3')
247+
# If confluence, it'll be location.confluenceLocation.url, if S3 it'll be location.s3Location.uri
248+
"source": result['location'].get('confluenceLocation', {}).get('url', result['location'].get('s3Location', {}).get('uri', 'unknown')),
234249
} for result in kb_response['retrievalResults']
235250
]
236251

237252
if os.environ.get("VERA_DEBUG", "False") == "True":
238-
print("🚀 Knowledge base responses:", kb_responses)
253+
print("🚀 Structured knowledge base responses:", kb_responses)
239254

240255
if enable_rerank:
241256
# Rerank the knowledge base results
@@ -347,15 +362,23 @@ def streaming_response_on_slack(client, streaming_response, initial_response, ch
347362
response = ""
348363
token_counter = 0
349364
buffer = ""
365+
full_event_payload = []
366+
367+
guardrail_type = None
368+
guardrail_confidence = None
369+
guardrail_filter_strength = None
370+
guardrail_action = None
350371

351-
# Iterate over eventstream chunks
352372
for chunk in streaming_response['stream']:
373+
full_event_payload.append(chunk) # accumulate full payload
374+
375+
# Handle streamed text for Slack updates
353376
if "contentBlockDelta" in chunk:
354377
text = chunk["contentBlockDelta"]["delta"]["text"]
355378
response += text
356379
buffer += text
357380
token_counter += 1
358-
381+
359382
if token_counter >= slack_buffer_token_size:
360383
client.chat_update(
361384
text=response,
@@ -365,18 +388,52 @@ def streaming_response_on_slack(client, streaming_response, initial_response, ch
365388
token_counter = 0
366389
buffer = ""
367390

368-
# If buffer contains anything after iterating over all chunks, add it also
391+
# Final Slack update
369392
if buffer:
370-
print(f"Final update to Slack with: {response}")
393+
# Check for blocked message
394+
if "input has been blocked by Veradigm's content filter" in response:
395+
if os.environ.get("VERA_DEBUG", "False") == "True":
396+
print("🚀 Full event payload:", full_event_payload)
397+
398+
for event in full_event_payload:
399+
if "metadata" in event and "trace" in event["metadata"]:
400+
trace = event["metadata"]["trace"]
401+
guardrail = trace.get("guardrail", {})
402+
input_assessment = guardrail.get("inputAssessment", {})
403+
404+
if guardrailIdentifier in input_assessment:
405+
assessment = input_assessment[guardrailIdentifier]
406+
filters = assessment.get("contentPolicy", {}).get("filters", [])
407+
if filters:
408+
first_filter = filters[0]
409+
guardrail_type = first_filter.get("type")
410+
guardrail_confidence = first_filter.get("confidence")
411+
guardrail_filter_strength = first_filter.get("filterStrength")
412+
guardrail_action = first_filter.get("action")
413+
break
414+
415+
# Enrich Slack message with guardrail info
416+
if guardrail_action == "BLOCKED":
417+
blocked_text = response
418+
response = (
419+
f"🛑 *Our security guardrail blocked this conversation*\n"
420+
f"> {blocked_text}\n\n"
421+
f"• *Guardrail blocked type:* {guardrail_type}\n"
422+
f"• *Strength our guardrail config is set to:* {guardrail_filter_strength}\n"
423+
f"• *Confidence this conversation breaks the rules:* {guardrail_confidence}\n\n"
424+
f"*You can try rephrasing your question, or open a ticket with DevOps to investigate*"
425+
)
426+
427+
print(f"🚀 Final update to Slack with: {response}")
371428
client.chat_update(
372429
text=response,
373430
channel=channel_id,
374431
ts=initial_response
375432
)
376-
433+
377434

378435
# Handle ai request input and response
379-
def ai_request(bedrock_client, messages, say, thread_ts, client, message_ts, channel_id, request_streaming_response=True):
436+
def ai_request(bedrock_client, messages, say, thread_ts, client, message_ts, channel_id, request_streaming_response=True, system_prompt=system_prompt):
380437

381438
# Format model system prompt for the request
382439
system = [
@@ -391,13 +448,9 @@ def ai_request(bedrock_client, messages, say, thread_ts, client, message_ts, cha
391448
}
392449

393450
# Additional inference parameters to use.
394-
# If model_id contains "sonnet", use top_k
395-
if "sonnet" in model_id:
396-
additional_model_fields = {
397-
"top_k": top_k
398-
}
399-
else:
400-
additional_model_fields = {}
451+
additional_model_fields = {
452+
"top_k": top_k
453+
}
401454

402455
# Build converse body. If guardrails is enabled, add those keys to the body
403456
if enable_guardrails:
@@ -406,6 +459,7 @@ def ai_request(bedrock_client, messages, say, thread_ts, client, message_ts, cha
406459
"guardrailConfig": {
407460
"guardrailIdentifier": guardrailIdentifier,
408461
"guardrailVersion": guardrailVersion,
462+
"trace": guardrailTracing,
409463
},
410464
"messages": messages,
411465
"system": system,
@@ -458,6 +512,7 @@ def ai_request(bedrock_client, messages, say, thread_ts, client, message_ts, cha
458512
f"😔 Error with request: " + str(error),
459513
)
460514

515+
461516
# Check for duplicate events
462517
def check_for_duplicate_event(headers, payload):
463518

@@ -825,28 +880,16 @@ def handle_message_event(client, body, say, bedrock_client, app, token, register
825880
initial_model_user_status_message,
826881
)
827882

828-
# Append to conversation
829-
conversation.append(
830-
{
831-
"role": "user",
832-
"content": [
833-
{
834-
"text": initial_model_context_instructions,
835-
}
836-
],
837-
}
838-
)
839-
840883
# Ask the AI for a response
841-
ai_response = ai_request(bedrock_client, conversation, say, thread_ts, client, message_ts, channel_id, False)
884+
ai_response = ai_request(bedrock_client, conversation, say, thread_ts, client, message_ts, channel_id, False, initial_model_system_prompt)
842885

843886
# Append to conversation
844887
conversation.append(
845888
{
846889
"role": "assistant",
847890
"content": [
848891
{
849-
"text": ai_response,
892+
"text": f"Initialization information from the model: {ai_response}",
850893
}
851894
],
852895
}
@@ -890,28 +933,36 @@ def handle_message_event(client, body, say, bedrock_client, app, token, register
890933

891934
# Get context data from the knowledge base
892935
try:
893-
knowledge_base_response = ask_bedrock_llm_with_knowledge_base(flat_conversation, kb_id, bedrock_client, kb_number_of_results, kb_rerank_number_of_results)
936+
knowledge_base_response = ask_bedrock_llm_with_knowledge_base(flat_conversation, kb_id, bedrock_client, kb_number_of_results, kb_rerank_number_of_results, say, client, channel_id, thread_ts)
894937
except Exception as error:
895938
# If the request fails, print the error
896939
print(f"🚀 Error making request to knowledge base {kb_name}: {error}")
897940

898941
# Split the error message at a colon, grab everything after the third colon
899942
error = str(error).split(":", 2)[-1].strip()
900-
901-
# Return error as response
902-
message_ts = update_slack_response(
903-
say, client, message_ts, channel_id, thread_ts,
904-
f"😔 Error fetching from knowledge base: " + str(error),
905-
)
906-
return
943+
944+
# If the error contains "resuming after being auto-paused", ask user to try again later
945+
if "resuming after being auto-paused" in error:
946+
message_ts = update_slack_response(
947+
say, client, message_ts, channel_id, thread_ts,
948+
f"😴 This is the first request to {bot_name} in a while, and it needs to wake up. \n\n:pray: Please tag this bot again in a few minutes."
949+
)
950+
else:
951+
# Return error as response
952+
message_ts = update_slack_response(
953+
say, client, message_ts, channel_id, thread_ts,
954+
f"😔 Error fetching from knowledge base: " + str(error),
955+
)
956+
# Raise error
957+
raise error
907958

908959
if os.environ.get("VERA_DEBUG", "False") == "True":
909960
print(f"🚀 Knowledge base response: {knowledge_base_response}")
910961

911962
# Iterate through responses
912963
for result in knowledge_base_response:
913964
citation_result = result['text']
914-
citation_url = result['url']
965+
citation_source = result['source']
915966

916967
# If reranking enabled, use that information
917968
if enable_rerank:
@@ -922,10 +973,10 @@ def handle_message_event(client, body, say, bedrock_client, app, token, register
922973
# Append to conversation
923974
conversation.append(
924975
{
925-
"role": "user",
976+
"role": "assistant",
926977
"content": [
927978
{
928-
"text": f"Knowledge base citation to supplement your answer: {citation_result} from URL {citation_url}. Reranker scored this result relevancy at {relevance_score}",
979+
"text": f"Knowledge base citation to supplement your answer: {citation_result} from source {citation_source}. Reranker scored this result relevancy at {relevance_score}",
929980
}
930981
],
931982
}
@@ -937,10 +988,10 @@ def handle_message_event(client, body, say, bedrock_client, app, token, register
937988
# Append to conversation
938989
conversation.append(
939990
{
940-
"role": "user",
991+
"role": "assistant",
941992
"content": [
942993
{
943-
"text": f"Knowledge base citation to supplement your answer: {citation_result} from URL {citation_url}",
994+
"text": f"Knowledge base citation to supplement your answer: {citation_result} from source {citation_source}",
944995
}
945996
],
946997
}
@@ -955,11 +1006,9 @@ def handle_message_event(client, body, say, bedrock_client, app, token, register
9551006
# Call the AI model with the conversation
9561007
if os.environ.get("VERA_DEBUG", "False") == "True":
9571008
print("🚀 State of conversation before AI request:", conversation)
958-
#streaming_response = ai_request(bedrock_client, conversation, say, thread_ts, client, message_ts, channel_id, False)
959-
ai_request(bedrock_client, conversation, say, thread_ts, client, message_ts, channel_id, True)
9601009

961-
# Stream the response back to slack
962-
#streaming_response_on_slack(client, streaming_response, message_ts, channel_id, thread_ts)
1010+
# Make the AI request
1011+
ai_request(bedrock_client, conversation, say, thread_ts, client, message_ts, channel_id, True)
9631012

9641013
# Print success
9651014
print("🚀 Successfully responded to message, exiting")

0 commit comments

Comments
 (0)