57
57
enable_guardrails = False # Won't use guardrails if False
58
58
guardrailIdentifier = "xxxxxxxxxx"
59
59
guardrailVersion = "DRAFT"
60
+ guardrailTracing = "enabled" # [enabled, enabled_full, disabled]
60
61
61
62
# Specify the AWS region for the AI model
62
63
model_region_name = "us-west-2"
63
64
64
65
# Initial context step
65
66
enable_initial_model_context_step = False
66
67
initial_model_user_status_message = "Adding additional context :waiting:"
67
- initial_model_context_instructions = f"""
68
+ initial_model_system_prompt = f"""
68
69
Assistant should...
69
70
"""
70
71
87
88
rerank_model_id = "amazon.rerank-v1:0"
88
89
89
90
# Model guidance, shimmed into each conversation as instructions for the model
90
- model_guidance = f"""Assistant is a large language model named { bot_name } who is trained to support our employees in providing the best possible experience for their developers and operations team.
91
+ system_prompt = f"""Assistant is a large language model named { bot_name } who is trained to support our employees in providing the best possible experience for their developers and operations team.
91
92
Assistant must follow Slack's best practices for formatting messages.
92
93
Assistant must encode all hyperlinks with pipe syntax. For example, "https://www.google.com" should be formatted as "<https://www.google.com|Google>".
93
94
Assistant must limit messages to { slack_message_size_limit_words } words, including code blocks. For longer responses Assistant should provide the first part of the response, and then prompt User to ask for the next part of the response.
@@ -125,18 +126,19 @@ def update_slack_response(say, client, message_ts, channel_id, thread_ts, messag
125
126
# Return the message_ts
126
127
return message_ts
127
128
129
+
128
130
# Reranking knowledge base results
129
131
def rerank_text (flat_conversation , kb_responses , bedrock_client , kb_rerank_number_of_results ):
130
132
131
133
# Data looks like this:
132
134
# [
133
135
# {
134
136
# "text": "text",
135
- # "url ": "url",
137
+ # "source ": "url",
136
138
# },
137
139
# {
138
140
# "text": "text",
139
- # "url ": "url",
141
+ # "source ": "url",
140
142
# }
141
143
# ]
142
144
@@ -196,7 +198,7 @@ def rerank_text(flat_conversation, kb_responses, bedrock_client, kb_rerank_numbe
196
198
{
197
199
# Use the index value in rank_response to find the correct kb_response
198
200
"text" : kb_responses [rank_response ['index' ]]["text" ],
199
- "url " : kb_responses [rank_response ['index' ]]["url " ],
201
+ "source " : kb_responses [rank_response ['index' ]]["source " ],
200
202
"relevance_score" : rank_response ['relevance_score' ]
201
203
} for rank_response in rank_response_body ['results' ]
202
204
]
@@ -205,7 +207,7 @@ def rerank_text(flat_conversation, kb_responses, bedrock_client, kb_rerank_numbe
205
207
206
208
207
209
# Function to retrieve info from RAG with knowledge base
208
- def ask_bedrock_llm_with_knowledge_base (flat_conversation , knowledge_base_id , bedrock_client , kb_number_of_results , kb_rerank_number_of_results ) -> str :
210
+ def ask_bedrock_llm_with_knowledge_base (flat_conversation , knowledge_base_id , bedrock_client , kb_number_of_results , kb_rerank_number_of_results , say , client , channel_id , thread_ts ) -> str :
209
211
210
212
# Create a Bedrock agent runtime client
211
213
bedrock_agent_runtime_client = boto3 .client (
@@ -214,28 +216,41 @@ def ask_bedrock_llm_with_knowledge_base(flat_conversation, knowledge_base_id, be
214
216
)
215
217
216
218
# Uses model to retrieve related vectors from knowledge base
217
- kb_response = bedrock_agent_runtime_client .retrieve (
218
- retrievalQuery = {
219
- 'text' : flat_conversation
220
- },
221
- knowledgeBaseId = knowledge_base_id ,
222
- retrievalConfiguration = {
223
- 'vectorSearchConfiguration' : {
224
- 'numberOfResults' : kb_number_of_results ,
225
- }
226
- },
227
- )
228
-
219
+ try :
220
+ kb_response = bedrock_agent_runtime_client .retrieve (
221
+ retrievalQuery = {
222
+ 'text' : flat_conversation
223
+ },
224
+ knowledgeBaseId = knowledge_base_id ,
225
+ retrievalConfiguration = {
226
+ 'vectorSearchConfiguration' : {
227
+ 'numberOfResults' : kb_number_of_results ,
228
+ }
229
+ },
230
+ )
231
+ # Catch exception around Aurora waking up
232
+ except Exception as error :
233
+ # If the request fails, print the error
234
+ print (f"🚀 Error making request to knowledge base: { error } " )
235
+
236
+ # Raise error
237
+ raise error
238
+
239
+ if os .environ .get ("VERA_DEBUG" , "False" ) == "True" :
240
+ print ("🚀 Raw knowledge base responses:" , kb_response )
241
+
229
242
# Structure response
230
243
kb_responses = [
231
244
{
232
245
"text" : result ['content' ]['text' ],
233
- "url" : result ['location' ].get ('confluenceLocation' , {}).get ('url' , 's3' )
246
+ #"source": result['location'].get('confluenceLocation', {}).get('url', 's3')
247
+ # If confluence, it'll be location.confluenceLocation.url, if S3 it'll be location.s3Location.uri
248
+ "source" : result ['location' ].get ('confluenceLocation' , {}).get ('url' , result ['location' ].get ('s3Location' , {}).get ('uri' , 'unknown' )),
234
249
} for result in kb_response ['retrievalResults' ]
235
250
]
236
251
237
252
if os .environ .get ("VERA_DEBUG" , "False" ) == "True" :
238
- print ("🚀 Knowledge base responses:" , kb_responses )
253
+ print ("🚀 Structured knowledge base responses:" , kb_responses )
239
254
240
255
if enable_rerank :
241
256
# Rerank the knowledge base results
@@ -347,15 +362,23 @@ def streaming_response_on_slack(client, streaming_response, initial_response, ch
347
362
response = ""
348
363
token_counter = 0
349
364
buffer = ""
365
+ full_event_payload = []
366
+
367
+ guardrail_type = None
368
+ guardrail_confidence = None
369
+ guardrail_filter_strength = None
370
+ guardrail_action = None
350
371
351
- # Iterate over eventstream chunks
352
372
for chunk in streaming_response ['stream' ]:
373
+ full_event_payload .append (chunk ) # accumulate full payload
374
+
375
+ # Handle streamed text for Slack updates
353
376
if "contentBlockDelta" in chunk :
354
377
text = chunk ["contentBlockDelta" ]["delta" ]["text" ]
355
378
response += text
356
379
buffer += text
357
380
token_counter += 1
358
-
381
+
359
382
if token_counter >= slack_buffer_token_size :
360
383
client .chat_update (
361
384
text = response ,
@@ -365,18 +388,52 @@ def streaming_response_on_slack(client, streaming_response, initial_response, ch
365
388
token_counter = 0
366
389
buffer = ""
367
390
368
- # If buffer contains anything after iterating over all chunks, add it also
391
+ # Final Slack update
369
392
if buffer :
370
- print (f"Final update to Slack with: { response } " )
393
+ # Check for blocked message
394
+ if "input has been blocked by Veradigm's content filter" in response :
395
+ if os .environ .get ("VERA_DEBUG" , "False" ) == "True" :
396
+ print ("🚀 Full event payload:" , full_event_payload )
397
+
398
+ for event in full_event_payload :
399
+ if "metadata" in event and "trace" in event ["metadata" ]:
400
+ trace = event ["metadata" ]["trace" ]
401
+ guardrail = trace .get ("guardrail" , {})
402
+ input_assessment = guardrail .get ("inputAssessment" , {})
403
+
404
+ if guardrailIdentifier in input_assessment :
405
+ assessment = input_assessment [guardrailIdentifier ]
406
+ filters = assessment .get ("contentPolicy" , {}).get ("filters" , [])
407
+ if filters :
408
+ first_filter = filters [0 ]
409
+ guardrail_type = first_filter .get ("type" )
410
+ guardrail_confidence = first_filter .get ("confidence" )
411
+ guardrail_filter_strength = first_filter .get ("filterStrength" )
412
+ guardrail_action = first_filter .get ("action" )
413
+ break
414
+
415
+ # Enrich Slack message with guardrail info
416
+ if guardrail_action == "BLOCKED" :
417
+ blocked_text = response
418
+ response = (
419
+ f"🛑 *Our security guardrail blocked this conversation*\n "
420
+ f"> { blocked_text } \n \n "
421
+ f"• *Guardrail blocked type:* { guardrail_type } \n "
422
+ f"• *Strength our guardrail config is set to:* { guardrail_filter_strength } \n "
423
+ f"• *Confidence this conversation breaks the rules:* { guardrail_confidence } \n \n "
424
+ f"*You can try rephrasing your question, or open a ticket with DevOps to investigate*"
425
+ )
426
+
427
+ print (f"🚀 Final update to Slack with: { response } " )
371
428
client .chat_update (
372
429
text = response ,
373
430
channel = channel_id ,
374
431
ts = initial_response
375
432
)
376
-
433
+
377
434
378
435
# Handle ai request input and response
379
- def ai_request (bedrock_client , messages , say , thread_ts , client , message_ts , channel_id , request_streaming_response = True ):
436
+ def ai_request (bedrock_client , messages , say , thread_ts , client , message_ts , channel_id , request_streaming_response = True , system_prompt = system_prompt ):
380
437
381
438
# Format model system prompt for the request
382
439
system = [
@@ -391,13 +448,9 @@ def ai_request(bedrock_client, messages, say, thread_ts, client, message_ts, cha
391
448
}
392
449
393
450
# Additional inference parameters to use.
394
- # If model_id contains "sonnet", use top_k
395
- if "sonnet" in model_id :
396
- additional_model_fields = {
397
- "top_k" : top_k
398
- }
399
- else :
400
- additional_model_fields = {}
451
+ additional_model_fields = {
452
+ "top_k" : top_k
453
+ }
401
454
402
455
# Build converse body. If guardrails is enabled, add those keys to the body
403
456
if enable_guardrails :
@@ -406,6 +459,7 @@ def ai_request(bedrock_client, messages, say, thread_ts, client, message_ts, cha
406
459
"guardrailConfig" : {
407
460
"guardrailIdentifier" : guardrailIdentifier ,
408
461
"guardrailVersion" : guardrailVersion ,
462
+ "trace" : guardrailTracing ,
409
463
},
410
464
"messages" : messages ,
411
465
"system" : system ,
@@ -458,6 +512,7 @@ def ai_request(bedrock_client, messages, say, thread_ts, client, message_ts, cha
458
512
f"😔 Error with request: " + str (error ),
459
513
)
460
514
515
+
461
516
# Check for duplicate events
462
517
def check_for_duplicate_event (headers , payload ):
463
518
@@ -825,28 +880,16 @@ def handle_message_event(client, body, say, bedrock_client, app, token, register
825
880
initial_model_user_status_message ,
826
881
)
827
882
828
- # Append to conversation
829
- conversation .append (
830
- {
831
- "role" : "user" ,
832
- "content" : [
833
- {
834
- "text" : initial_model_context_instructions ,
835
- }
836
- ],
837
- }
838
- )
839
-
840
883
# Ask the AI for a response
841
- ai_response = ai_request (bedrock_client , conversation , say , thread_ts , client , message_ts , channel_id , False )
884
+ ai_response = ai_request (bedrock_client , conversation , say , thread_ts , client , message_ts , channel_id , False , initial_model_system_prompt )
842
885
843
886
# Append to conversation
844
887
conversation .append (
845
888
{
846
889
"role" : "assistant" ,
847
890
"content" : [
848
891
{
849
- "text" : ai_response ,
892
+ "text" : f"Initialization information from the model: { ai_response } " ,
850
893
}
851
894
],
852
895
}
@@ -890,28 +933,36 @@ def handle_message_event(client, body, say, bedrock_client, app, token, register
890
933
891
934
# Get context data from the knowledge base
892
935
try :
893
- knowledge_base_response = ask_bedrock_llm_with_knowledge_base (flat_conversation , kb_id , bedrock_client , kb_number_of_results , kb_rerank_number_of_results )
936
+ knowledge_base_response = ask_bedrock_llm_with_knowledge_base (flat_conversation , kb_id , bedrock_client , kb_number_of_results , kb_rerank_number_of_results , say , client , channel_id , thread_ts )
894
937
except Exception as error :
895
938
# If the request fails, print the error
896
939
print (f"🚀 Error making request to knowledge base { kb_name } : { error } " )
897
940
898
941
# Split the error message at a colon, grab everything after the third colon
899
942
error = str (error ).split (":" , 2 )[- 1 ].strip ()
900
-
901
- # Return error as response
902
- message_ts = update_slack_response (
903
- say , client , message_ts , channel_id , thread_ts ,
904
- f"😔 Error fetching from knowledge base: " + str (error ),
905
- )
906
- return
943
+
944
+ # If the error contains "resuming after being auto-paused", ask user to try again later
945
+ if "resuming after being auto-paused" in error :
946
+ message_ts = update_slack_response (
947
+ say , client , message_ts , channel_id , thread_ts ,
948
+ f"😴 This is the first request to { bot_name } in a while, and it needs to wake up. \n \n :pray: Please tag this bot again in a few minutes."
949
+ )
950
+ else :
951
+ # Return error as response
952
+ message_ts = update_slack_response (
953
+ say , client , message_ts , channel_id , thread_ts ,
954
+ f"😔 Error fetching from knowledge base: " + str (error ),
955
+ )
956
+ # Raise error
957
+ raise error
907
958
908
959
if os .environ .get ("VERA_DEBUG" , "False" ) == "True" :
909
960
print (f"🚀 Knowledge base response: { knowledge_base_response } " )
910
961
911
962
# Iterate through responses
912
963
for result in knowledge_base_response :
913
964
citation_result = result ['text' ]
914
- citation_url = result ['url ' ]
965
+ citation_source = result ['source ' ]
915
966
916
967
# If reranking enabled, use that information
917
968
if enable_rerank :
@@ -922,10 +973,10 @@ def handle_message_event(client, body, say, bedrock_client, app, token, register
922
973
# Append to conversation
923
974
conversation .append (
924
975
{
925
- "role" : "user " ,
976
+ "role" : "assistant " ,
926
977
"content" : [
927
978
{
928
- "text" : f"Knowledge base citation to supplement your answer: { citation_result } from URL { citation_url } . Reranker scored this result relevancy at { relevance_score } " ,
979
+ "text" : f"Knowledge base citation to supplement your answer: { citation_result } from source { citation_source } . Reranker scored this result relevancy at { relevance_score } " ,
929
980
}
930
981
],
931
982
}
@@ -937,10 +988,10 @@ def handle_message_event(client, body, say, bedrock_client, app, token, register
937
988
# Append to conversation
938
989
conversation .append (
939
990
{
940
- "role" : "user " ,
991
+ "role" : "assistant " ,
941
992
"content" : [
942
993
{
943
- "text" : f"Knowledge base citation to supplement your answer: { citation_result } from URL { citation_url } " ,
994
+ "text" : f"Knowledge base citation to supplement your answer: { citation_result } from source { citation_source } " ,
944
995
}
945
996
],
946
997
}
@@ -955,11 +1006,9 @@ def handle_message_event(client, body, say, bedrock_client, app, token, register
955
1006
# Call the AI model with the conversation
956
1007
if os .environ .get ("VERA_DEBUG" , "False" ) == "True" :
957
1008
print ("🚀 State of conversation before AI request:" , conversation )
958
- #streaming_response = ai_request(bedrock_client, conversation, say, thread_ts, client, message_ts, channel_id, False)
959
- ai_request (bedrock_client , conversation , say , thread_ts , client , message_ts , channel_id , True )
960
1009
961
- # Stream the response back to slack
962
- #streaming_response_on_slack(client, streaming_response, message_ts, channel_id, thread_ts )
1010
+ # Make the AI request
1011
+ ai_request ( bedrock_client , conversation , say , thread_ts , client , message_ts , channel_id , True )
963
1012
964
1013
# Print success
965
1014
print ("🚀 Successfully responded to message, exiting" )
0 commit comments