In [2]:
import os
import boto3

# Set the AWS profile
os.environ['AWS_PROFILE'] = 'ccbd-joseph'

# Now you can use boto3 to interact with AWS services
# For example, creating an S3 client
s3 = boto3.client('s3')

# List buckets
response = s3.list_buckets()
for bucket in response['Buckets']:
    print(f'Bucket Name: {bucket["Name"]}')


Bucket Name: cf-templates-1g550bcpoyriy-us-east-1
Bucket Name: cf-templates-1g550bcpoyriy-us-east-2
Bucket Name: dinnerconcierge-jg4376
Bucket Name: photo-album-assets
Bucket Name: photos-frontend-jg4376
Bucket Name: photos-jg4376
Bucket Name: sagemaker-studio-561013435358-e1wss47m5oa
Bucket Name: sagemaker-studio-561013435358-ezvq1b3ry
Bucket Name: sagemaker-us-east-1-561013435358
Bucket Name: skinfix-frontend
Bucket Name: skinfix-ml-bucket
Bucket Name: skinfix-patient-photos
Bucket Name: skinfix-train-data
Bucket Name: voice-files-all


In [None]:
from opensearchpy import OpenSearch, RequestsHttpConnection, helpers
from requests.auth import HTTPBasicAuth

# Master user credentials
master_username = 'ccbdgroup'
master_password = 'CCBDGroup1!'

# OpenSearch client initialization with master user authentication
opensearch = OpenSearch(
    hosts=[{'host': 'search-doctors-domain-jhukjvozwcmrtu4a3ngi6umdc4.us-east-1.es.amazonaws.com', 'port': 443}],
    http_auth=HTTPBasicAuth(master_username, master_password),
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection,
    timeout=60  # Increase timeout to 30 seconds
)

In [13]:
# Initialize clients for DynamoDB and OpenSearch
dynamodb = boto3.resource('dynamodb')


# DynamoDB table
table = dynamodb.Table('doctorsInfo')

# Scan DynamoDB table (consider pagination for large tables)
response = table.scan()
items = response['Items']

# Process records and create mapping
focus_mapping = {}
for item in items:
    doctor_id = item['doctorId']
    focus_areas = [_f.lower() for _f in item['focus']]
    for focus in focus_areas:
        if focus not in focus_mapping:
            focus_mapping[focus] = []

        focus_mapping[focus].append(doctor_id)

# Prepare data for OpenSearch indexing
actions = [
    {
        "_index": "doctor-focus-areas",
        "_id": focus,
        "_source": {
            "focus_area": focus,
            "doctor_ids": list(set(focus_mapping[focus]))
        }
    }
    for focus in focus_mapping
]

# Bulk index data into OpenSearch
helpers.bulk(opensearch, actions)


(213, [])

In [15]:
# Define the index name
index_name = "doctor-focus-areas"

# Define the search query
search_query = {
    "query": {
        "match": {
            "focus_area": "moles"
        }
    }
}

# Perform the search
try:
    search_response = opensearch.search(index=index_name, body=search_query)
    print("Search Response:", search_response)

    # Extracting doctor IDs from the search response
    doctor_ids = []
    for hit in search_response['hits']['hits']:
        doctor_ids.extend(hit['_source']['doctor_ids'])

    # Removing potential duplicates
    doctor_ids = list(set(doctor_ids))

    print("Doctor IDs:", doctor_ids)

except Exception as e:
    print("Error performing search:", e)


Search Response: {'took': 1873, 'timed_out': False, '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': 4.188632, 'hits': [{'_index': 'doctor-focus-areas', '_id': 'moles', '_score': 4.188632, '_source': {'focus_area': 'moles', 'doctor_ids': ['sin_0000072500105319868991', 'sin_0000072500029420988831', 'sin_0000072500001497120562', 'sin_0000072500029420986821', 'sin_0000072500064423620231', 'sin_0000072500089092733181', 'sin_0000072500066008379221', 'sin_0000072500001497166942', 'sin_0000072500104670182191', 'sin_0000072500095611143331']}}, {'_index': 'doctor-focus-areas', '_id': 'nevi (moles)', '_score': 3.4732242, '_source': {'focus_area': 'nevi (moles)', 'doctor_ids': ['sin_0000072500105319868991', 'sin_0000072500001497211182', 'sin_0000072500001497120562', 'sin_0000072500064423620231', 'sin_0000072500001497166942', 'sin_0000072500104670182191', 'sin_0000072500095611143331']}}]}}
Doctor IDs: ['sin_00000725

In [16]:
try:
    response = opensearch.delete_by_query(
        index=index_name,
        body={
            "query": {
                "match_all": {}  # Matches all documents
            }
        }
    )
    print("All documents deleted:", response)
except Exception as e:
    print("Error deleting documents:", e)

All documents deleted: {'took': 5232, 'timed_out': False, 'total': 213, 'deleted': 213, 'batches': 1, 'version_conflicts': 0, 'noops': 0, 'retries': {'bulk': 0, 'search': 0}, 'throttled_millis': 0, 'requests_per_second': -1.0, 'throttled_until_millis': 0, 'failures': []}
