In [1]:
import json
import requests
import json
import base64
from PIL import Image
from io import BytesIO
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
from requests_aws4auth import AWS4Auth
from requests.auth import HTTPBasicAuth
from botocore.exceptions import ClientError
from os import listdir, walk
import boto3

In [2]:
# image to base64
def image_to_base64(image_path):
    with Image.open(image_path) as img:
        # 创建一个字节流对象
        buffered = BytesIO()
        # 将图片保存到字节流中，并指定格式 (例如 'JPEG', 'PNG')
        img.save(buffered, format=img.format)
        # 获取字节数据并进行 Base64 编码
        img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
        return img_base64

In [3]:
def image_resize(base64_image_data,width,height):
        try:
            image_data = base64.b64decode(base64_image_data)
            # 将二进制数据转换为 Pillow 支持的 Image 对象
            image = Image.open(BytesIO(image_data))
            # 调整图片大小到 320x320
            resized_image = image.resize((width, height))

            # 将调整后的图片直接转换为 Base64 编码
            buffer = BytesIO()
            resized_image.save(buffer, format=image.format)  # 使用原始格式保存到内存
            resized_base64_data = base64.b64encode(buffer.getvalue()).decode()  # 转为 Base64 字符串
            return resized_base64_data
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error in resize image: {str(e)}")

# Nova generate description test

In [49]:
# 描述信息生成函数
def enrich_image_desc(image_base64):
    client = boto3.client(
        "bedrock-runtime"
    )

    # Set the model ID, e.g., Titan Text Premier.
    # anthropic.claude-3-haiku-20240307-v1:0
    # anthropic.claude-3-5-sonnet-20240620-v1:0
    model_id = 'amazon.nova-pro-v1:0'

    # Start a conversation with the user message.
    user_message = """
        You will be analyzing an image and extracting its key features, including tags, and providing a brief summary of the image content.

        First, carefully examine the image provided in {$IMAGE}.

        Then, in Markdown format, provide the following:

        1. **Tags**: List the key tags that describe the main elements and subjects in the image.
        2. **Summary**: Write a concise 1-2 sentence summary describing the overall content and meaning of the image.

        Format your response as follows:

        # Image Analysis

        ## Tags
        - Tag 1
        - Tag 2
        - Tag 3

        ## Summary
        A brief 1-2 sentence summary of the image content.

        Provide your response within <result> tags.
    """
    
    image_base64 = image_resize(image_base64,320,320)


    if model_id=='anthropic.claude-3-5-sonnet-20240620-v1:0':
        body = json.dumps(
            {
                "anthropic_version": "bedrock-2023-05-31",
                "max_tokens": 5000,
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image",
                                "source": {
                                    "type": "base64",
                                    "media_type": "image/jpeg",
                                    "data": image_base64,
                                },
                            },
                            {"type": "text", "text": user_message},
                        ],
                    }
                ],
                
            }
        )
    else:
        print("in")
        body = json.dumps(
            {
                "schemaVersion": "messages-v1",
                "inferenceConfig": {"max_new_tokens": 5000},
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {
                                "image": {
                                    "format": "jpg",
                                    "source": {"bytes": image_base64},
                                }
                            },
                            {
                                "text": user_message
                            }
                        ],
                    }
                ]
            }
        )
        
    try:
        # Send the message to the model, using a basic inference configuration.
        response = client.invoke_model(
            modelId=model_id,
            body=body
        )

        # Extract and print the response text.
        response_body = json.loads(response.get("body").read())
        print(response_body)
        return response_body

    except (ClientError, Exception) as e:
        print(str(e))
        # return(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
        # exit(1)
        raise e

In [50]:
img_base64 = image_to_base64("/Users/fangyili/Downloads/搜图10.19/元素图相同，款式不同/期望搜索图一.jpg")
img_base64 = image_resize(img_base64,320,320)
response_body = enrich_image_desc(img_base64)
description = response_body['output']['message']['content'][0]['text']

in
{'output': {'message': {'content': [{'text': '<result>\n\n# Image Analysis\n\n## Tags\n- Butterfly\n- Mouse pad\n- Computer mouse\n- Desk\n- Design\n\n## Summary\nAn image showcasing a wooden desk with a computer monitor, keyboard, and a mouse pad featuring a butterfly design. A white computer mouse is placed on the mouse pad.\n\n</result>'}], 'role': 'assistant'}}, 'stopReason': 'end_turn', 'usage': {'inputTokens': 1472, 'outputTokens': 58, 'totalTokens': 1530, 'cacheReadInputTokenCount': None, 'cacheWriteInputTokenCount': None}}


# embedding test

In [53]:
description = response_body['output']['message']['content'][0]['text']
embedding_body = json.dumps({
    # "inputText": description,
    "inputImage": img_base64,
    "embeddingConfig": {
        "outputEmbeddingLength": 1024
    }
})
bedrock_runtime = boto3.client("bedrock-runtime")
response = bedrock_runtime.invoke_model(
    body=embedding_body,
    modelId='amazon.titan-embed-image-v1',
    accept="application/json",
    contentType="application/json"
)
print(response)
embedding_json = json.loads(response['body'].read().decode('utf-8'))
print(embedding_json["embedding"][0])
print(len(embedding_json["embedding"]))

{'ResponseMetadata': {'RequestId': '7a8e4a61-8f83-40c6-bd58-bb3df5a761a3', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Wed, 04 Dec 2024 12:29:55 GMT', 'content-type': 'application/json', 'content-length': '12783', 'connection': 'keep-alive', 'x-amzn-requestid': '7a8e4a61-8f83-40c6-bd58-bb3df5a761a3', 'x-amzn-bedrock-invocation-latency': '89', 'x-amzn-bedrock-input-token-count': '0'}, 'RetryAttempts': 0}, 'contentType': 'application/json', 'body': <botocore.response.StreamingBody object at 0x10b508700>}
0.02035558
1024


# Test nova rerank

In [68]:
query_text = 'find a phone case with similar pattern'
prompt = f"""You are an AI assistant tasked with identifying the cell in an image that best matches a user's query. The image shows two parts: on the left is the query image, and on the right is a grid of numbered cells.

Your goal is to review the user's query ({query_text}) and both images, then identify the cell from the right grid that best matches the query image and text. You should provide your response in JSON format, with the following structure:

[
  {{
    "imageIndexNo": "the index number of the matching cell",
    "reason": "a brief explanation of why this cell is the best match"
  }}
]

To accomplish this task:

1. Carefully review the user's query ({query_text}) and the query image on the left.
2. Examine the grid on the right and identify the cell that best matches based on visual features and the query.
3. Determine the index number of the matching cell.
4. Write a brief explanation of why this cell is the best match.
5. Format your response in the JSON structure specified above.

Be as specific and accurate as possible in your response. If you are unable to identify a clear match, explain why in your response."""
image_base64 = image_to_base64("/Users/fangyili/Downloads/Picture1.png")
body = json.dumps(
    {
        "schemaVersion": "messages-v1",
        "inferenceConfig": {"max_new_tokens": 1024},
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "image": {
                            "format": "png",
                            "source": {"bytes": image_base64},
                        }
                    },
                    {
                        "text": prompt
                    }
                ],
            }
        ]
    }
)
response = bedrock_runtime.invoke_model(
    modelId='amazon.nova-pro-v1:0',
    body=body
)

response_body = json.loads(response['body'].read())
print(response_body)
response_body = response_body['output']['message']['content'][0]['text']
print(response_body)
matches = json.loads(response_body)
print(matches)
matched_indices = [int(match['imageIndexNo']) for match in matches]
print(matched_indices)

{'output': {'message': {'content': [{'text': '[\n  {\n    "imageIndexNo": "6",\n    "reason": "The query image on the left shows a leopard pattern, which closely matches the pattern on the phone case in cell 6 on the right grid. The leopard spots in both images are similar in size, shape, and distribution, making cell 6 the best match for the user\'s query."\n  }\n]'}], 'role': 'assistant'}}, 'stopReason': 'end_turn', 'usage': {'inputTokens': 1572, 'outputTokens': 73, 'totalTokens': 1645, 'cacheReadInputTokenCount': None, 'cacheWriteInputTokenCount': None}}
[
  {
    "imageIndexNo": "6",
    "reason": "The query image on the left shows a leopard pattern, which closely matches the pattern on the phone case in cell 6 on the right grid. The leopard spots in both images are similar in size, shape, and distribution, making cell 6 the best match for the user's query."
  }
]
[{'imageIndexNo': '6', 'reason': "The query image on the left shows a leopard pattern, which closely matches the patter

In [59]:
query_text = 'find a phone case with similar pattern'
prompt = f"""You are an AI assistant tasked with identifying the cell in an image that best matches a user's query. The image shows two parts: on the left is the query image, and on the right is a grid of numbered cells.

Your goal is to review the user's query ({query_text}) and both images, then identify the cell from the right grid that best matches the query image and text. You should provide your response in JSON format, with the following structure:

[
  {{
    "imageIndexNo": "the index number of the matching cell",
    "reason": "a brief explanation of why this cell is the best match"
  }}
]

To accomplish this task:

1. Carefully review the user's query ({query_text}) and the query image on the left.
2. Examine the grid on the right and identify the cell that best matches based on visual features and the query.
3. Determine the index number of the matching cell.
4. Write a brief explanation of why this cell is the best match.
5. Format your response in the JSON structure specified above.

Be as specific and accurate as possible in your response. If you are unable to identify a clear match, explain why in your response."""
image_base64 = image_to_base64("/Users/fangyili/Downloads/Picture1.png")
body = {
    "anthropic_version": "bedrock-2023-05-31",
    "max_tokens": 1024,
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/png",
                        "data": image_base64
                    }
                },
                {
                    "type": "text",
                    "text": prompt
                }
            ]
        }
    ]
}

response = bedrock_runtime.invoke_model(
    modelId='anthropic.claude-3-5-sonnet-20240620-v1:0',
    body=json.dumps(body)
)
        
response_body = json.loads(response['body'].read())
print(response_body)

{'id': 'msg_bdrk_01M3rQFtM4kV5t5eTeqE5LXc', 'type': 'message', 'role': 'assistant', 'model': 'claude-3-5-sonnet-20240620', 'content': [{'type': 'text', 'text': 'Based on the query to find a phone case with a similar pattern to the image on the left, which appears to show a leopard, I\'ve analyzed the grid of phone cases on the right. Here\'s my response in the requested JSON format:\n\n[\n  {\n    "imageIndexNo": "7",\n    "reason": "Cell 7 shows a phone case with a blue leopard print pattern, which is the closest match to the leopard pattern in the query image. While the colors are different (blue instead of the natural leopard colors), the distinctive spotted pattern of a leopard\'s coat is clearly represented, making this the best match for the requested similar pattern."\n  }\n]'}], 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 509, 'output_tokens': 150}}


In [63]:
response = response_body['content'][0]['text']
response

'Based on the query to find a phone case with a similar pattern to the image on the left, which appears to show a leopard, I\'ve analyzed the grid of phone cases on the right. Here\'s my response in the requested JSON format:\n\n[\n  {\n    "imageIndexNo": "7",\n    "reason": "Cell 7 shows a phone case with a blue leopard print pattern, which is the closest match to the leopard pattern in the query image. While the colors are different (blue instead of the natural leopard colors), the distinctive spotted pattern of a leopard\'s coat is clearly represented, making this the best match for the requested similar pattern."\n  }\n]'

In [62]:
matches = json.loads(response)
matched_indices = [int(match['imageIndexNo']) for match in matches]

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

# Test batch upload

In [44]:
import boto3
import base64
import datetime
import uuid
import logging
import jsonlines

# Configure logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())

s3_client = boto3.client('s3')
paginator = s3_client.get_paginator('list_objects_v2')
first = True
next_token = None
# Initialization: Initialize a bedrock client
client = boto3.client('bedrock')
# # Initialization: Create S3 directory
# s3_client.put_object(
#     Bucket='imagebucket-442042528912-us-east-1',
#     Key='INVOCATION-INPUT-NO-IMAGE/'
# )
# s3_client.put_object(
#     Bucket='imagebucket-442042528912-us-east-1',
#     Key='INVOCATION-OUTPUT-NO-IMAGE/'
# )
# Initialization: Invocation job configuration
discriptionGeneratorInputDataConfig=({
    "s3InputDataConfig": {
        "s3Uri": f"s3://imagebucket-442042528912-us-east-1/IONVOCATION-INPUT-NO-IMAGE/descn-input.jsonl"
    }
})
embeddingGeneratorInputDataConfig=({
    "s3InputDataConfig": {
        "s3Uri": f"s3://imagebucket-442042528912-us-east-1/IONVOCATION-INPUT-NO-IMAGE/embedding-input.jsonl"
    }
})
discriptionGeneratorOutputDataConfig=({
    "s3OutputDataConfig": {
        "s3Uri": f"s3://imagebucket-442042528912-us-east-1/INVOCATION-OUTPUT-NO-IMAGE/{str(uuid.uuid4())}-descn/"
    }
})
embeddingGeneratorOutputDataConfig=({
    "s3OutputDataConfig": {
        "s3Uri": f"s3://imagebucket-442042528912-us-east-1/INVOCATION-OUTPUT-NO-IMAGE/{str(uuid.uuid4())}-embedding/"
    }
})
# iterate all uploaded objects
while first or next_token:
    # Create a PageIterator from the Paginator
    page_iterator = paginator.paginate(
        Bucket='imagebucket-442042528912-us-east-1',
        Prefix='images',
        PaginationConfig={'PageSize':10,'StartingToken':next_token}
    )
    for page in page_iterator:
        # one upload batch
        documents = []
        descn_gen_batch_inference_data = []
        embedding_gen_batch_inference_data = []
        for obj in page['Contents']:
            # Get image base64
            s3_key = obj['Key']
            response = s3_client.get_object(Bucket='imagebucket-442042528912-us-east-1',Key=s3_key)
            streaming_body = response.get("Body").read()
            image_base64 = base64.b64encode(streaming_body).decode('utf-8')
            image_id = str(uuid.uuid4())
            description = ''
            # Construct description generation payload
            descn_gen_payload = {
                "recordId": str(uuid.uuid4()), 
                "modelInput": {
                    "schemaVersion": "messages-v1",
                    "inferenceConfig": {"max_new_tokens": 5000},
                    "messages": [
                        {
                            "role": "user",
                            "content": [
                                {
                                    "image": {
                                        "format": "jpg",
                                        "source": {"bytes": image_base64},
                                    }
                                },
                                {
                                    "text": "Generate description for the image"
                                }
                            ],
                        }
                    ]
                }
            }
            descn_gen_batch_inference_data.append(descn_gen_payload)
            # Construct embedding generation payload
            embedding_gen_payload = {
                "recordId": str(uuid.uuid4()),
                "modelInput":{
                    "inputText": '',
                    "inputImage": image_base64,
                    "embeddingConfig": {
                        "outputEmbeddingLength": 1024
                    }
                }
            }
            embedding_gen_batch_inference_data.append(embedding_gen_payload)
            # Construct document
            dt = datetime.datetime.now().isoformat()
            document = {
                'id': image_id,
                'description': description,
                'embedding': '',
                'createtime': dt,
                'image_path': s3_key
            }
            documents.append(document)
        # bulk upload
        # Invocation job to generate description
        with jsonlines.open('descn-input.jsonl', 'w') as writer:
            writer.write_all(descn_gen_batch_inference_data)
        s3_client.upload_file('descn-input.jsonl', 'imagebucket-442042528912-us-east-1', 'IONVOCATION-INPUT-NO-IMAGE/descn-input.jsonl')
        descn_gen_response = client.create_model_invocation_job(
            roleArn='arn:aws:iam::442042528912:role/bedrockBatchJobTestRole',
            modelId='amazon.nova-pro-v1:0',
            jobName="generate-description-batch-job",
            inputDataConfig=discriptionGeneratorInputDataConfig,
            outputDataConfig=discriptionGeneratorOutputDataConfig
        )
        # Invocation job to generate embedding
        with jsonlines.open('embedding-input.jsonl', 'w') as writer:
            writer.write_all(embedding_gen_batch_inference_data)
        s3_client.upload_file('embedding-input.jsonl', 'imagebucket-442042528912-us-east-1', 'IONVOCATION-INPUT-NO-IMAGE/embedding-input.jsonl')
        embedding_gen_response = client.create_model_invocation_job(
            roleArn='arn:aws:iam::442042528912:role/bedrockBatchJobTestRole',
            modelId='amazon.titan-embed-image-v1',
            jobName="generate-embedding-batch-job",
            inputDataConfig=embeddingGeneratorInputDataConfig,
            outputDataConfig=embeddingGeneratorOutputDataConfig
        )
        break
    break
        # print(page['Contents'])

In [29]:
print(boto3.__version__)

1.35.49


In [45]:
client.list_model_invocation_jobs()

{'ResponseMetadata': {'RequestId': 'fbb72755-948b-4187-ac4f-485657f8f719',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Sun, 08 Dec 2024 11:48:18 GMT',
   'content-type': 'application/json',
   'content-length': '1894',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'fbb72755-948b-4187-ac4f-485657f8f719'},
  'RetryAttempts': 0},
 'invocationJobSummaries': [{'jobArn': 'arn:aws:bedrock:us-east-1:442042528912:model-invocation-job/d2bneg2cnyvd',
   'jobName': 'generate-embedding-batch-job',
   'modelId': 'arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-image-v1:0',
   'roleArn': 'arn:aws:iam::442042528912:role/bedrockBatchJobTestRole',
   'status': 'Validating',
   'submitTime': datetime.datetime(2024, 12, 8, 11, 46, 14, 809000, tzinfo=tzutc()),
   'lastModifiedTime': datetime.datetime(2024, 12, 8, 11, 46, 58, 543000, tzinfo=tzutc()),
   'inputDataConfig': {'s3InputDataConfig': {'s3Uri': 's3://imagebucket-442042528912-us-east-1/IONVOCATION-INPUT-NO-IMAGE/embedd

In [48]:
client.get_model_invocation_job(jobIdentifier='arn:aws:bedrock:us-east-1:442042528912:model-invocation-job/d2bneg2cnyvd')['status']

'Failed'

In [49]:
client.get_model_invocation_job(jobIdentifier='arn:aws:bedrock:us-east-1:442042528912:model-invocation-job/d2bneg2cnyvd')

{'ResponseMetadata': {'RequestId': 'f1fb4612-db12-44a3-b988-30715d912e43',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Sun, 08 Dec 2024 11:50:23 GMT',
   'content-type': 'application/json',
   'content-length': '1102',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'f1fb4612-db12-44a3-b988-30715d912e43'},
  'RetryAttempts': 0},
 'jobArn': 'arn:aws:bedrock:us-east-1:442042528912:model-invocation-job/d2bneg2cnyvd',
 'jobName': 'generate-embedding-batch-job',
 'modelId': 'arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-image-v1:0',
 'clientRequestToken': '81cfb034-ac93-48bd-969d-d112dfc33ad5',
 'roleArn': 'arn:aws:iam::442042528912:role/bedrockBatchJobTestRole',
 'status': 'Failed',
 'message': 'Batch job arn:aws:bedrock:us-east-1:442042528912:model-invocation-job/d2bneg2cnyvd contains less records (10) than the required minimum of: 100',
 'submitTime': datetime.datetime(2024, 12, 8, 11, 46, 14, 809000, tzinfo=tzutc()),
 'lastModifiedTime': datetime.datetime(