# OpenSearch demo

Enter your region and host name below

In [151]:
#Host name should be WITHOUT https://

region = 'us-east-1' 
aos_host = "XXXXXXXXXXXXXXXX.us-east-1.aoss.amazonaws.com"

In [152]:
!pip install opensearch-py pandas --quiet

In [153]:
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
import boto3
import json

credentials = boto3.Session().get_credentials()
auth = AWSV4SignerAuth(credentials, region, service="aoss")

aos_client = OpenSearch(
    hosts = [{'host': aos_host, 'port': 443}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection
)

In [154]:
knn_index = {
    "aliases": {},
    "settings": {
      "index.knn": True,
      "index.knn.algo_param.ef_search": 512,
      "analysis": {
          "analyzer": {
              "default": {
                  "type": "standard",
                  "stopwords": "_english_"
              }
          }
      }
    },
    "mappings": {
      "properties": {
        "title": {
            "type": "text",
            "store": True
        },
        "year": {
            "type": "text",
            "store": True
        },
        "rating": {
            "type": "text",
            "store": True
        },
        "runtime": {
            "type": "text",
            "store": True
        },
        "genres": {
            "type": "text",
            "store": True
        },
        "imdb_rating": {
            "type": "text",
            "store": True
        },
        "overview": {
          "type": "text",
          "store": True
        },
        "overview_vector": {
          "type": "knn_vector",
          "dimension": 1536,
          "method": {
            "engine": "nmslib",
            "space_type": "cosinesimil",
            "name": "hnsw",
            "parameters": {
                "ef_search": 512
            }
          }
        }
      }
    }
  }

In [155]:
aos_client.indices.create(index="knn_demo",body=knn_index,ignore=400)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'knn_demo'}

Load data into the index. This process will take about 9 minutes

In [157]:
#Load data from a CSV file called "imdb_top_1000.csv"
import csv
import numpy as np

def load_data(filename):
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        data = list(reader)
    return data

result = load_data("imdb_top_1000.csv")

#Discard the first item from result because it's a header
del(result[0])

#Create the bedrock runtime 
bedrock_runtime = boto3.client("bedrock-runtime")

for item in result:
    #Create the embedding for the "overview" field
    body = json.dumps({
        "inputText": item[6]
    })

    result = bedrock_runtime.invoke_model(body=body, accept="application/json", contentType="application/json", modelId="amazon.titan-embed-text-v1")

    response_body = json.loads(result['body'].read())
    embedding = response_body.get("embedding")

    #Load the data into OpenSearch
    aos_client.index(index="knn_demo", body={"title": item[0], "year": item[1], "rating": item[2], 
                                             "runtime": item[3], "genres": item[4], "imdb_rating": item[5],
                                             "overview": item[6], 
                                             "overview_vector": embedding})


In [164]:
import pandas as pd

text_query = "A father travels across the reaches of space and time to save his family"
#text_query = "Ein Workaholic-Prominenter aus New York City zieht aufs Land und verliebt sich in einen Mann, der auf der Farm seiner Familie arbeitet"

body_query = json.dumps({
        "inputText": text_query
})

result_query = bedrock_runtime.invoke_model(body=body_query, accept="application/json", contentType="application/json", modelId="amazon.titan-embed-text-v1")

response_query = json.loads(result_query['body'].read())
embedding_query = response_query.get("embedding")

#print(embedding_query)

query={
    "query": {
        "knn": {
            "overview_vector":{
                "vector":embedding_query,
                "k":10
            }
        }
    }
}

res = aos_client.search(index="knn_demo", 
                       body=query,
                       stored_fields=["title","year","overview"])
print("Got %d Hits:" % res['hits']['total']['value'])

query_result=[]
for hit in res['hits']['hits']:
    row=[hit['_score'],hit['fields']['title'][0],hit['fields']['overview'][0]]
    query_result.append(row)

df = pd.DataFrame(query_result, columns=['score','title','overview'])
display(df)

Got 40 Hits:


Unnamed: 0,score,title,overview
0,0.711054,God's Own Country,Spring. Yorkshire. Young farmer Johnny Saxby n...
1,0.707339,The Visitor,A college professor travels to New York City t...
2,0.697947,Days of Heaven,A hot-tempered farm laborer convinces the woma...
3,0.695339,The Notebook,A poor yet passionate young man falls in love ...
4,0.693337,Trois couleurs: Blanc,"After his wife divorces him, a Polish immigran..."
5,0.692678,Jab We Met,A depressed wealthy businessman finds his life...
6,0.689324,Victoria,A young Spanish woman who has recently moved t...
7,0.688509,The Lives of Others,"In 1984 East Berlin, an agent of the secret po..."
8,0.687855,Midnight Cowboy,A naive hustler travels from Texas to New York...
9,0.687223,Zindagi Na Milegi Dobara,Three friends decide to turn their fantasy vac...
