In [1]:
!pip install opensearch-py
!pip install boto3
!pip install botocore
!pip install requests-aws4auth



In [3]:
from opensearchpy import OpenSearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth
import boto3
import botocore
import time

In [4]:
# Build the client using the default credential configuration.
# You can use the CLI and run 'aws configure' to set access key, secret
# key, and default region.

client = boto3.client('opensearchserverless')
service = 'aoss'
region = 'us-east-1'
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key,
                   region, service, session_token=credentials.token)

In [5]:
def createEncryptionPolicy(client):
    """Creates an encryption policy that matches all collections beginning with tv-"""
    try:
        response = client.create_security_policy(
            description='Encryption policy for TV collections',
            name='tv-policy',
            policy="""
                {
                    "Rules":[
                        {
                            "ResourceType":"collection",
                            "Resource":[
                                "collection/*"
                            ]
                        }
                    ],
                    "AWSOwnedKey":true
                }
                """,
            type='encryption'
        )
        print('\nEncryption policy created:')
        print(response)
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ConflictException':
            print(
                '[ConflictException] The policy name or rules conflict with an existing policy.')
        else:
            raise error




In [6]:
createEncryptionPolicy(client)

ClientError: An error occurred (ExpiredTokenException) when calling the CreateSecurityPolicy operation: The security token included in the request is expired

In [None]:
def createNetworkPolicy(client):
    """Creates a network policy that matches all collections beginning with tv-"""
    try:
        response = client.create_security_policy(
            description='Network policy for TV collections',
            name='tv-policy',
            policy="""
                [{
                    "Description":"Public access for TV collection",
                    "Rules":[
                        {
                            "ResourceType":"dashboard",
                            "Resource":["collection/*"]
                        },
                        {
                            "ResourceType":"collection",
                            "Resource":["collection/*"]
                        }
                    ],
                    "AllowFromPublic":true
                }]
                """,
            type='network'
        )
        print('\nNetwork policy created:')
        print(response)
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ConflictException':
            print(
                '[ConflictException] A network policy with this name already exists.')
        else:
            raise error



In [None]:
createNetworkPolicy(client)

In [None]:

def createAccessPolicy(client):
    """Creates a data access policy that matches all collections beginning with tv-"""
    try:
        response = client.create_access_policy(
            description='Data access policy for TV collections',
            name='tv-policy',
            policy="""
                [{
                    "Rules":[
                        {
                            "Resource":[
                                "index/*/*"
                            ],
                            "Permission":[
                                "aoss:CreateIndex",
                                "aoss:DeleteIndex",
                                "aoss:UpdateIndex",
                                "aoss:DescribeIndex",
                                "aoss:ReadDocument",
                                "aoss:WriteDocument"
                            ],
                            "ResourceType": "index"
                        },
                        {
                            "Resource":[
                                "collection/*"
                            ],
                            "Permission":[
                                "aoss:CreateCollectionItems",
                                "aoss:DeleteCollectionItems,
                                "aoss:UpdateCollectionItems",
                                "aoss:DescribeCollectionItems"
                            ],
                            "ResourceType": "collection"
                        }
                    ],
                    "Principal":[
                        "arn:aws:iam::794038231401:role/service-role/SageMaker-ExecutionRole-20250724T161782"
                    ]
                }]
                """,
            type='data'
        )
        print('\nAccess policy created:')
        print(response)
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ConflictException':
            print(
                '[ConflictException] An access policy with this name already exists.')
        else:
            raise error

In [None]:
!aws sts get-caller-identity

In [None]:
createAccessPolicy(client)

In [None]:
def createCollection(client):
    """Creates a collection"""
    try:
        response = client.create_collection(
            name='tv-sitcoms2',
            type='VECTORSEARCH'
        )
        return(response)
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ConflictException':
            print(
                '[ConflictException] A collection with this name already exists. Try another name.')
        else:
            raise error

def waitForCollectionCreation(client):
    """Waits for the collection to become active"""
    response = client.batch_get_collection(
        names=['tv-sitcoms'])
    # Periodically check collection status
    while (response['collectionDetails'][0]['status']) == 'CREATING':
        print('Creating collection...')
        time.sleep(30)
        response = client.batch_get_collection(
            names=['tv-sitcoms'])
    print('\nCollection successfully created:')
    print(response["collectionDetails"])

In [None]:
createCollection(client)

In [None]:
waitForCollectionCreation(client)

In [None]:
from opensearchpy import OpenSearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth
import boto3

# Value for RequestsHttpConnection
# This is the class imported from opensearchpy
RequestsHttpConnection  # It's a class, used as connection_class=RequestsHttpConnection in OpenSearch client

# Value for awsauth
# This is constructed using your AWS credentials
session = boto3.Session()
credentials = session.get_credentials()
region = 'us-east-1'  # Extracted from your collection ARN
awsauth = AWS4Auth(
    credentials.access_key,
    credentials.secret_key,
    region,
    'aoss',
    session_token=credentials.token
)

# Value for host
# Extracted from your collectionEndpoint by removing 'https://'
host = '5e0cxl565t1a2zir5ptg.us-east-1.aoss.amazonaws.com'

In [None]:
# Initialize OpenSearch client
os_client = OpenSearch(
    hosts=[{'host': host, 'port': 443}],
    http_auth=awsauth,
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection,
    timeout=300
)

# Define index name and body (matches console configs)
index_name = 'main-index'
index_body = {
    "settings": {
        "index.knn": True
    },
    "mappings": {
        "properties": {
            "embeddings": {  # Vector field
                "type": "knn_vector",
                "dimension": 1024,  # Adjust based on model (e.g., 1024 for Cohere)
                "method": {
                    "name": "hnsw",
                    "space_type": "l2",  # Euclidean distance
                    "engine": "faiss",
                    "parameters": {
                        "m": 16,
                        "ef_construction": 128
                    }
                }
            },
            "text": {  # Chunk text field, filterable (indexed)
                "type": "text"
            },
            "bedrock-metadata": {  # Metadata field, not filterable (not indexed)
                "type": "text",
                "index": False
            }
        }
    }
}

# Create the index
response = os_client.indices.create(index=index_name, body=index_body)
print('Index created:', response)