In [None]:
import rich, json


def print_json(data):
    rich.print_json(json.dumps(data))

In [None]:
import boto3

print(boto3.__version__)

1.35.76


In [42]:
import boto3

# Create a Bedrock client
bedrock = boto3.client("bedrock")
default_region = "us-east-1"

# List all models from Amazon
models = bedrock.list_foundation_models(
    byProvider="Meta",
)

nova_models = [
    model["modelId"]
    for model in models["modelSummaries"]
    if "meta" in model["modelId"].lower()
]
print("meta models found:", nova_models)

meta models found: ['meta.llama3-8b-instruct-v1:0', 'meta.llama3-70b-instruct-v1:0', 'meta.llama3-1-8b-instruct-v1:0:128k', 'meta.llama3-1-8b-instruct-v1:0', 'meta.llama3-1-70b-instruct-v1:0:128k', 'meta.llama3-1-70b-instruct-v1:0', 'meta.llama3-1-405b-instruct-v1:0', 'meta.llama3-2-11b-instruct-v1:0', 'meta.llama3-2-90b-instruct-v1:0', 'meta.llama3-2-1b-instruct-v1:0', 'meta.llama3-2-3b-instruct-v1:0']


In [46]:
from botocore.exceptions import ClientError

# Set the model ID.
model_id = "meta.llama3-8b-instruct-v1:0"

# Set the prompt.
prompt = "Describe the purpose of a 'hello world' program in one line."

# Create a Bedrock Runtime client in the AWS Region you want to use.
bedrock_runtime = boto3.client("bedrock-runtime", region_name=default_region)

# Embed the prompt in Llama 3's instruction format.
# More information: https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/
formatted_prompt = f"""
<|begin_of_text|>
<|start_header_id|>user<|end_header_id|>
{prompt}
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""

# Format the request payload using the model's native structure.
native_request = {
    "prompt": formatted_prompt,
    "max_gen_len": 512,
    "temperature": 0.5,
}

# Convert the native request to JSON.
request = json.dumps(native_request)

try:
    # Invoke the model with the request.
    response = bedrock_runtime.invoke_model(modelId=model_id, body=request)

    # Decode the response body.
    model_response = json.loads(response["body"].read())

    # Extract and print the response text.
    response_text = model_response["generation"]
    print(response_text)

except (ClientError, Exception) as e:
    print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
    exit(1)

A "Hello, World!" program is a simple computer program that prints the text "Hello, World!" to the screen, serving as a traditional starting point for learning programming languages and testing the basic functionality of a programming environment.


In [None]:
def send_message_to_model(
    conversation,
    model_id=model_id,
    max_tokens=512,
    temperature=0.5,
    top_p=0.9,
    system_prompt="You are a helpful assistant focused in Meta Llama models",
):
    """
    Send a message to a model and return the response.

    Args:
        conversation (list): The conversation history/messages to send to the model.
        model_id (str): The ID of the model to use.
        max_tokens (int): Maximum number of tokens to generate in the response.
        temperature (float): Sampling temperature to control randomness.
        top_p (float): Nucleus sampling parameter to control the range of token sampling.
        system_prompt (str): System prompt to guide the model's behavior.

    Returns:
        dict: The response from the model, containing the generated text and additional metadata.
    """
    try:
        # Send the message to the model, using the provided inference configuration.
        response = bedrock_runtime.converse(
            modelId=model_id,
            messages=conversation,
            inferenceConfig={
                "maxTokens": max_tokens,
                "temperature": temperature,
                "topP": top_p,
            },
            system=[{"text": system_prompt}],
        )

        # Extract and print the response text.
        print(response["output"]["message"]["content"][0]["text"])
        return response

    except (ClientError, Exception) as e:
        print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
        exit(1)

In [None]:
# baseline parameters when invoking the model
model_id = "meta.llama3-8b-instruct-v1:0"
system_prompt = "You are a helpful assistant. Be concise and only respond with the answer to the question. Focus only in questions related to Tesla Cars."
message = {
    "role": "user",
    "content": [{"text": "What is a good fit for a family of 4 human and three cats?"}],
}
conversation = []

conversation.append(message)

response = send_message_to_model(
    conversation=conversation, model_id=model_id, system_prompt=system_prompt
)
response



The Tesla Model X or Model S would be a good fit for a family of 4 humans and three cats. The Model X offers more cargo space and fold-flat second-row seats, making it easier to accommodate pet carriers or strollers. The Model S also has ample cargo space and a comfortable ride.


{'ResponseMetadata': {'RequestId': 'd7062af4-83a4-4e0d-b5ce-ddaa0292397b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Mon, 09 Dec 2024 01:51:56 GMT',
   'content-type': 'application/json',
   'content-length': '465',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'd7062af4-83a4-4e0d-b5ce-ddaa0292397b'},
  'RetryAttempts': 0},
 'output': {'message': {'role': 'assistant',
   'content': [{'text': '\n\nThe Tesla Model X or Model S would be a good fit for a family of 4 humans and three cats. The Model X offers more cargo space and fold-flat second-row seats, making it easier to accommodate pet carriers or strollers. The Model S also has ample cargo space and a comfortable ride.'}]}},
 'stopReason': 'end_turn',
 'usage': {'inputTokens': 57, 'outputTokens': 63, 'totalTokens': 120},
 'metrics': {'latencyMs': 709}}

In [None]:
import logging

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


def stream_conversation(
    bedrock_client,
    model_id,
    messages,
    system_prompts,
    inference_config,
    additional_model_fields,
):
    """
    Sends messages to a model and streams the response.
    Args:
        bedrock_client: The Boto3 Bedrock runtime client.
        model_id (str): The model ID to use.
        messages (JSON) : The messages to send.
        system_prompts (JSON) : The system prompts to send.
        inference_config (JSON) : The inference configuration to use.
        additional_model_fields (JSON) : Additional model fields to use.

    Returns:
        None
    """

    logger.info("Streaming messages with model %s", model_id)

    response = bedrock_client.converse_stream(
        modelId=model_id,
        messages=messages,
        system=system_prompts,
        inferenceConfig=inference_config,
        additionalModelRequestFields=additional_model_fields,
    )

    stream = response.get("stream")
    if stream:
        for event in stream:

            if "messageStart" in event:
                print(f"\nRole: {event['messageStart']['role']}")

            if "contentBlockDelta" in event:
                print(event["contentBlockDelta"]["delta"]["text"], end="")

            if "messageStop" in event:
                print(f"\nStop reason: {event['messageStop']['stopReason']}")

            if "metadata" in event:
                metadata = event["metadata"]
                if "usage" in metadata:
                    print("\nToken usage")
                    print(f"Input tokens: {metadata['usage']['inputTokens']}")
                    print(f"Output tokens: {metadata['usage']['outputTokens']}")
                    print(f"Total tokens: {metadata['usage']['totalTokens']}")
                if "metrics" in event["metadata"]:
                    print(f"Latency: {metadata['metrics']['latencyMs']} ms")

In [None]:
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")

system_prompt = """You are an app that creates playlists for a radio station
  that plays rock and pop music. Only return song names and the artist."""

# Message to send to the model.
input_text = "Create a list of 3 pop songs."

message = {"role": "user", "content": [{"text": input_text}]}
conversation = [message]

# System prompts.
system_prompts = [{"text": system_prompt}]

# inference parameters to use.
temperature = 0.5

# Base inference parameters.
inference_config = {"temperature": temperature}

# Additional model inference parameters.
additional_model_fields = {}

try:
    bedrock_client = boto3.client(service_name="bedrock-runtime")

    stream_conversation(
        bedrock_client,
        model_id,
        conversation,
        system_prompts,
        inference_config,
        additional_model_fields,
    )

except ClientError as err:
    message = err.response["Error"]["Message"]
    logger.error("A client error occurred: %s", message)
    print("A client error occured: " + format(message))

else:
    print(f"Finished streaming messages with model {model_id}.")

INFO:__main__:Streaming messages with model meta.llama3-8b-instruct-v1:0
ERROR:__main__:A client error occurred: You don't have access to the model with the specified model ID.


A client error occured: You don't have access to the model with the specified model ID.
