# Using AWS Bedrock for RAG

In [None]:
from zenml.client import Client
from zenml.service_connectors.service_connector import ServiceConnector


def get_boto_client() -> ServiceConnector:
    zc = Client()
    return zc.get_service_connector_client(
        name_id_or_prefix="0b04bcae-efc9-4044-a1c2-b86281cb0820",  # TODO: pull this out into config file
        resource_type="aws-generic",
    ).connect()

## Create a collection in OpenSearch Serverless

### First Create a Security Policy for OSSS

Docs:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/opensearchserverless/client/create_security_policy.html

"Before you create a collection using the AWS CLI, you must have an encryption policy with a resource pattern that matches the intended name of the collection. For example, if you plan to name your collection logs-application, you might create an encryption policy like this:

```bash
aws opensearchserverless create-security-policy \
  --name logs-policy \
  --type encryption --policy "{\"Rules\":[{\"ResourceType\":\"collection\",\"Resource\":[\"collection\/logs-application\"]}],\"AWSOwnedKey\":true}"
```

If you plan to use the policy for additional collections, you can make the rule more broad, such as collection/logs* or collection/*."

In [None]:
# Create an OpenSearchServiceServerless client
osss_client = get_boto_client().client(
    "opensearchserverless", region_name="us-east-1"
)

In [None]:
import json

from botocore.exceptions import ClientError

encryption_policy_name = "zenml-rag-encryption-policy"
encryption_policy = {
    "Rules": [
        {
            "ResourceType": "collection",
            "Resource": ["collection/zenml-rag-collection-*"],
        }
    ],
    "AWSOwnedKey": True,
}

try:
    encryption_response = osss_client.create_security_policy(
        name=encryption_policy_name,
        type="encryption",
        policy=json.dumps(encryption_policy),
        description="Encryption policy for ZenML RAG collections",
    )
    print(
        f"Encryption policy created: {encryption_response['securityPolicyDetail']['name']}"
    )
except ClientError: # ConflictException is raised when the policy already exists
    print(f"The policy {encryption_policy_name} already exists. No need to create it again.")

### Create the OpenSearch Serverless Collection

Docs: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/opensearchserverless/client/create_collection.html

In [18]:
from typing import Any, Dict, List


def create_opensearch_collection(
    client: Any,
    name: str,
    description: str,
    type: str = "VECTORSEARCH",
    standby_replicas: str = "ENABLED",
    tags: List[Dict[str, str]] = [],
) -> Dict[str, Any]:
    """
    Create an OpenSearch Serverless collection.

    Args:
        client (Any): The boto3 client for OpenSearch Serverless.
        name (str): The name of the collection to create.
        description (str): The description of the collection.
        type (str, optional): The type of the collection. Defaults to "VECTORSEARCH".
        standby_replicas (str, optional): Whether to enable standby replicas. Defaults to "ENABLED".
        tags (Optional[Dict[str, str]], optional): Tags to apply to the collection. Defaults to None.

    Returns:
        Dict[str, Any]: The details of the created collection.

    Raises:
        Exception: If there is an error creating the collection.
    """
    try:
        response = client.create_collection(
            name=name,
            description=description,
            type=type,
            standbyReplicas=standby_replicas,
            tags=tags,
        )
        return response["createCollectionDetail"]
    except Exception as e:
        raise e

In [23]:
# Create an OpenSearch Serverless collection
collection_name = "zenml-rag-collection-01"
collection_description = "ZenML RAG Collection"

try:
    collection_detail = create_opensearch_collection(
        client=osss_client,
        name=collection_name,
        description=collection_description,
    )
    collection_arn = collection_detail["arn"]
    print(f"Collection created successfully. ARN: {collection_arn}")
except ClientError as e:
    error_code = e.response['Error']['Code']
    if error_code == "ConflictException":
        print(f"The collection {collection_name} already exists. No need to create it again.")
        collections = osss_client.list_collections()
        collection_arn = collections["collectionSummaries"][0]["arn"]
    else:
        print(f"Error creating collection: {e}")
except Exception as e:
    print(f"Error creating collection: {e}")

The collection zenml-rag-collection-01 already exists. No need to create it again.


## Create a Knowledge Base

In [24]:
from zenml.client import Client

AWS_BEDROCK_TITAN_EMBEDDINGS_ARN = (
    "arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v1"
)
kb_name = "zenml-rag-kb"
kb_description = "ZenML RAG Knowledge Base"

brc = get_boto_client().client("bedrock-agent", region_name="us-east-1")
kb_response = brc.create_knowledge_base(
    name=kb_name,
    description=kb_description,
    roleArn="arn:aws:iam::339712793861:role/service-role/AmazonBedrockExecutionRoleForKnowledgeBase_96gjm",  # TODO: pull this out into config file
    knowledgeBaseConfiguration={
        "type": "VECTOR",
        "vectorKnowledgeBaseConfiguration": {
            "embeddingModelArn": AWS_BEDROCK_TITAN_EMBEDDINGS_ARN,
        },
    },
    storageConfiguration={
        "type": "OPENSEARCH_SERVERLESS",
        "opensearchServerlessConfiguration": {
            "collectionArn": collection_arn,
            "fieldMapping": {
                "metadataField": "metadata",
                "textField": "text",
                "vectorField": "vector",
            },
            "vectorIndexName": "zenml-rag-vector-index",
        },
    },
)
kb_response

In [None]:
kb_id = kb_response["knowledgeBase"]["knowledgeBaseId"]
kb_id