In [None]:
!pip install boto3 -U

## Initialize the Bedrock Client

- We pass a Custom URL using the **`endpoint_url`** param. 
- We add a custom header using the boto3 events, see the [documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/events.html) for more

In [18]:
import boto3
import os

# Initialize a bedrock runtime client 
bedrock_client = boto3.client(
    service_name='bedrock-runtime',
    # aws_access_key_id="hello",
    # aws_secret_access_key="yes",
    endpoint_url='http://54.237.30.33:5000', # custom endpoint URL
)

# Create a function that adds a custom header and prints all headers.
def add_custom_header_before_call(model, params, request_signer, **kwargs):
    params['headers']['bearer-token'] = '1234567890' # Here is where you pass the token 
    # print(f'param headers: {params}')

#  Register the function to an event.
event_system = bedrock_client.meta.events
event_system.register_first('before-call.*.*', add_custom_header_before_call)

## Dummy Gateway

For my usecase I created a flask gateway on EC2 with the following logic:

```
from flask import Flask, request, jsonify                                                                                                                                       
import boto3
from botocore.exceptions import ClientError

app = Flask(__name__)

# The expected bearer token
EXPECTED_TOKEN = '1234567890'

@app.route('/model/<model_id>/converse', methods=['POST'])
def converse(model_id):
    data = request.json
    print(f"Received request for model: {model_id}")
    print(f"Request data: {data}")

    # Check for the bearer token in the headers
    bearer_token = request.headers.get('bearer-token')
    print(f"token: {bearer_token}")

    # If the token is missing or incorrect, return an unauthorized error
    if not bearer_token or bearer_token != EXPECTED_TOKEN:
        return jsonify({"error": "You are not authorized to make this call"}), 401

    try:
        # Create a boto3 client for bedrock-runtime
        bedrock_client = boto3.client('bedrock-runtime')

        # Forward the request to Amazon Bedrock
        response = bedrock_client.converse(
            modelId=model_id,
            **data  # Unpack the request data
        )

        # Return the response from the Bedrock API
        return jsonify(response), 200

    except ClientError as e:
        error_message = e.response['Error']['Message']
        error_code = e.response['Error']['Code']
        return jsonify({"error": error_message, "code": error_code}), e.response['ResponseMetadata']['HTTPStatusCode']

    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)
```

The gateway retrieves the bearer token and does a basic match to an expected token (simulating authentication) before proceeding to foward the request to the bedrock service

## Make a call to bedrock runtime using the custom endpoint

If we use a different bearer token above, an error will be thrown due to the simple hardcoded accepted token we have in our gateway logic

In [29]:
chat_history=[]
content=[]
content.append({ "text" : "whats your name" })

chat_history.append({"role": "user",
        "content": content})

system_message = [{"text":"Chatty"}]

model_id = "us.meta.llama3-1-70b-instruct-v1:0" # "us.anthropic.claude-3-5-sonnet-20241022-v2:0"


response = bedrock_client.converse(
                                    messages=chat_history, 
                                    modelId=model_id,
                                    inferenceConfig={"maxTokens": 2000, "temperature": 0.5,},
                                    system=system_message,                 
## Uncomment below to use optimized inference for llama3.1 and Haiku 3.5. Gateway Bedrock client must be configured with `us-east-2` for this to work, else an error will be thrown.
                                    # performanceConfig={"latency" : "optimized"} 
                                  )
response

{'ResponseMetadata': {'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'gunicorn',
   'date': 'Thu, 05 Dec 2024 03:28:12 GMT',
   'connection': 'close',
   'content-type': 'application/json',
   'content-length': '852'},
  'RetryAttempts': 0},
 'output': {'message': {'role': 'assistant',
   'content': [{'text': '\n\nI\'m an AI, and I don\'t have a personal name, but I\'m often referred to as "Assistant" or "Chatbot". If you\'d like, I can generate a nickname for our conversation. Some options could be:\n\n* Chatty (as you\'ve already mentioned)\n* LLaMA (a nod to my language model)\n* Nova (meaning "new" in Latin)\n* or something else entirely!\n\nWhat do you think?'}]}},
 'stopReason': 'end_turn',
 'usage': {'inputTokens': 20, 'outputTokens': 93, 'totalTokens': 113},
 'metrics': {'latencyMs': 3413}}